Exemple #1
0
class TFDataTest(DataTest, unittest.TestCase):
    def test_tf_saver(self, data_config=None, save_to_json_file=False):
        if data_config is None:
            self.data_config = DataConfig(**self.storage_config,
                                          dataset_name="test_tf_saver",
                                          additional={
                                              "framework": "tensorflow"
                                          },
                                          attributes=[
                                              DataAttribute("image",
                                                            "uint8", (2, 2),
                                                            itemsize=32),
                                              DataAttribute("target",
                                                            "uint8", (1),
                                                            itemsize=32),
                                          ])
        else:
            self.data_config = data_config

        if save_to_json_file:
            self.data_config_file = "data_config_file.json"
            self.data_config.to_json_file(self.data_config_file)

        self.data_saver = DataSaver(config=self.data_config)

        self.data_saver({
            "image":
            np.asarray([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]),
            "target":
            np.asarray([0, 1]),
        })
        self.data_saver.disconnect()

    def test_tf_loader(self):
        from matorage.tensorflow import Dataset

        self.test_tf_saver()

        self.dataset = Dataset(config=self.data_config)

        for batch_idx, (image, target) in enumerate(
                tqdm(self.dataset.dataloader, total=2)):
            pass

    def test_tf_loader_with_compressor(self):
        from matorage.tensorflow import Dataset

        data_config = DataConfig(**self.storage_config,
                                 dataset_name="test_tf_loader_with_compressor",
                                 additional={"framework": "tensorflow"},
                                 compressor={
                                     "complevel": 4,
                                     "complib": "zlib"
                                 },
                                 attributes=[
                                     DataAttribute("image",
                                                   "uint8", (2, 2),
                                                   itemsize=32),
                                     DataAttribute("target",
                                                   "uint8", (1),
                                                   itemsize=32),
                                 ])

        self.test_tf_saver(data_config=data_config)

        self.dataset = Dataset(config=data_config)

        for batch_idx, (image, target) in enumerate(
                tqdm(self.dataset.dataloader, total=2)):
            pass

    def test_tf_index(self):
        from matorage.tensorflow import Dataset

        self.test_tf_loader()

        dataset = Dataset(config=self.data_config, index=True)

        assert tf.reduce_all(
            tf.equal(dataset[0][0],
                     tf.constant([[1, 2], [3, 4]], dtype=tf.uint8)))
        assert tf.reduce_all(
            tf.equal(dataset[0][1], tf.constant([0], dtype=tf.uint8)))

    def test_tf_index_with_compressor(self):
        from matorage.tensorflow import Dataset

        data_config = DataConfig(**self.storage_config,
                                 dataset_name="test_tf_index_with_compressor",
                                 additional={"framework": "tensorflow"},
                                 compressor={
                                     "complevel": 4,
                                     "complib": "zlib"
                                 },
                                 attributes=[
                                     DataAttribute("image",
                                                   "uint8", (2, 2),
                                                   itemsize=32),
                                     DataAttribute("target",
                                                   "uint8", (1),
                                                   itemsize=32),
                                 ])

        self.test_tf_saver(data_config=data_config)

        dataset = Dataset(config=self.data_config, index=True)

        assert tf.reduce_all(
            tf.equal(dataset[0][0],
                     tf.constant([[1, 2], [3, 4]], dtype=tf.uint8)))
        assert tf.reduce_all(
            tf.equal(dataset[0][1], tf.constant([0], dtype=tf.uint8)))

    def test_saver_from_json_file(self):

        self.test_tf_saver(save_to_json_file=True)

        self.data_config = None
        self.data_saver = None

        self.data_config = DataConfig.from_json_file(self.data_config_file)

        self.data_saver = DataSaver(config=self.data_config)

        self.data_saver({
            "image":
            np.asarray([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]),
            "target":
            np.asarray([0, 1]),
        })
        self.data_saver.disconnect()

    def test_loader_from_json_file(self):
        from matorage.tensorflow import Dataset

        self.test_tf_saver(save_to_json_file=True)

        self.data_config = None

        self.data_config = DataConfig.from_json_file(self.data_config_file)

        self.dataset = Dataset(config=self.data_config)

        for batch_idx, (image, target) in enumerate(
                tqdm(self.dataset.dataloader, total=2)):
            pass

    def test_tf_not_clear(self):
        from matorage.tensorflow import Dataset

        self.test_tf_loader()

        if os.path.exists(self.dataset.cache_path):
            with open(self.dataset.cache_path) as f:
                _pre_file_mapper = json.load(f)

        self.dataset = Dataset(config=self.data_config, clear=False)

        if os.path.exists(self.dataset.cache_path):
            with open(self.dataset.cache_path) as f:
                _next_file_mapper = json.load(f)

        self.assertEqual(_pre_file_mapper, _next_file_mapper)
Exemple #2
0
class DataSaverTest(DataTest, unittest.TestCase):
    def test_dataconfig_one_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_one_attribute",
            attributes=DataAttribute("x", "uint8", (1)))

    def test_dataconfig_one_attribute_with_tuple_attributes(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_one_attribute_with_tuple_attributes",
            attributes=("x", "uint8", (1)))

    def test_reload_dataconfig(self):
        self.data_config = DataConfig(**self.storage_config,
                                      dataset_name="test_reload_dataconfig",
                                      attributes=DataAttribute(
                                          "x", "uint8", (1)))
        self.data_config_file = "data_config_file.json"
        self.data_config.to_json_file(self.data_config_file)

        self.data_config = None

        self.data_config = DataConfig.from_json_file(self.data_config_file)

    def test_dataconfig_two_attributes(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_two_attributes",
            attributes=[
                DataAttribute("x", "uint8", (1)),
                DataAttribute("y", "uint8", (1)),
            ])

    def test_dataconfig_two_attribute_with_tuple_attributes(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_two_attribute_with_tuple_attributes",
            attributes=[("x", "uint8", (1)), ("y", "uint8", (1))])

    def test_dataconfig_attributes_already_exist(self):
        with self.assertRaisesRegex(KeyError, "is already exist in"):
            self.data_config = DataConfig(
                **self.storage_config,
                dataset_name="test_dataconfig_attributes_already_exist",
                attributes=[
                    DataAttribute("x", "uint8", (1)),
                    DataAttribute("x", "uint8", (1)),
                ])

    def test_dataconfig_string_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_string_attribute",
            attributes=[DataAttribute("x", "string", (1), itemsize=32)])

    def test_dataconfig_bool_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_bool_attribute",
            attributes=[DataAttribute("x", "bool", (1))])

    def test_dataconfig_int8_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_int8_attribute",
            attributes=[DataAttribute("x", "int8", (1))])

    def test_dataconfig_int16_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_int16_attribute",
            attributes=[DataAttribute("x", "int16", (1))])

    def test_dataconfig_int32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_int32_attribute",
            attributes=[DataAttribute("x", "int32", (1))])

    def test_dataconfig_uint8_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_uint8_attribute",
            attributes=[DataAttribute("x", "uint8", (1))])

    def test_dataconfig_uint16_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_uint16_attribute",
            attributes=[DataAttribute("x", "uint16", (1))])

    def test_dataconfig_uint32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_uint32_attribute",
            attributes=[DataAttribute("x", "uint32", (1))])

    def test_dataconfig_uint64_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_uint64_attribute",
            attributes=[DataAttribute("x", "uint64", (1))])

    def test_dataconfig_float32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_float32_attribute",
            attributes=[DataAttribute("x", "float32", (1))])

    def test_dataconfig_float64_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_float64_attribute",
            attributes=[DataAttribute("x", "float64", (1))])

    def test_datasaver_string_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_string_attribute",
            attributes=[DataAttribute("x", "string", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([["a", "b"], ["c", "d"], ["e", "f"]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_bool_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_bool_attribute",
            attributes=[DataAttribute("x", "bool", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[True, False], [False, True], [True, True]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_int8_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_int8_attribute",
            attributes=[DataAttribute("x", "int8", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_int16_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_int16_attribute",
            attributes=[DataAttribute("x", "int16", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_int32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_int32_attribute",
            attributes=[DataAttribute("x", "int32", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_int64_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_int64_attribute",
            attributes=[DataAttribute("x", "int64", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_uint8_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_uint8_attribute",
            attributes=[DataAttribute("x", "uint8", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_uint16_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_uint16_attribute",
            attributes=[DataAttribute("x", "uint16", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_uint32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_uint32_attribute",
            attributes=[DataAttribute("x", "uint32", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_uint64_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_uint64_attribute",
            attributes=[DataAttribute("x", "uint64", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_float32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_float32_attribute",
            attributes=[DataAttribute("x", "float32", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_float64_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_float64_attribute",
            attributes=[DataAttribute("x", "float64", (2), itemsize=32)])
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_inmemory(self):
        self.data_config = DataConfig(**self.storage_config,
                                      dataset_name="test_datasaver_inmemory",
                                      attributes=[
                                          DataAttribute("x",
                                                        "float64", (2),
                                                        itemsize=32)
                                      ])
        self.data_saver = DataSaver(config=self.data_config, inmemory=True)
        x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

    def test_datasaver_zlib(self):
        for level in range(10):
            self.data_config = DataConfig(**self.storage_config,
                                          dataset_name="test_datasaver_zlib",
                                          attributes=[
                                              DataAttribute("x",
                                                            "float64", (2),
                                                            itemsize=32)
                                          ],
                                          compressor={
                                              "complevel": level,
                                              "complib": "zlib"
                                          })
            self.data_saver = DataSaver(config=self.data_config)
            x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
            self.assertEqual(x.shape, (3, 2))
            self.data_saver({"x": x})

    def test_datasaver_lzo(self):
        for level in range(10):
            self.data_config = DataConfig(**self.storage_config,
                                          dataset_name="test_datasaver_lzo",
                                          attributes=[
                                              DataAttribute("x",
                                                            "float64", (2),
                                                            itemsize=32)
                                          ],
                                          compressor={
                                              "complevel": level,
                                              "complib": "lzo"
                                          })
            self.data_saver = DataSaver(config=self.data_config)
            x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
            self.assertEqual(x.shape, (3, 2))
            self.data_saver({"x": x})

    def test_datasaver_bzip2(self):
        for level in range(10):
            self.data_config = DataConfig(**self.storage_config,
                                          dataset_name="test_datasaver_bzip2",
                                          attributes=[
                                              DataAttribute("x",
                                                            "float64", (2),
                                                            itemsize=32)
                                          ],
                                          compressor={
                                              "complevel": level,
                                              "complib": "bzip2"
                                          })
            self.data_saver = DataSaver(config=self.data_config)
            x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
            self.assertEqual(x.shape, (3, 2))
            self.data_saver({"x": x})

    def test_datasaver_blosc(self):
        for level in range(10):
            self.data_config = DataConfig(**self.storage_config,
                                          dataset_name="test_datasaver_blosc",
                                          attributes=[
                                              DataAttribute("x",
                                                            "float64", (2),
                                                            itemsize=32)
                                          ],
                                          compressor={
                                              "complevel": level,
                                              "complib": "blosc"
                                          })
            self.data_saver = DataSaver(config=self.data_config)
            x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
            self.assertEqual(x.shape, (3, 2))
            self.data_saver({"x": x})

    def test_datasaver_nas(self):

        self.data_config = DataConfig(
            endpoint="/tmp",
            dataset_name="test_datasaver_nas",
            attributes=[DataAttribute("x", "float64", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
Exemple #3
0
class DataSaverTest(DataTest, unittest.TestCase):
    def test_dataconfig_one_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_one_attribute",
            attributes=DataAttribute("x", "uint8", (1)),
        )

    def test_dataconfig_one_attribute_with_tuple_attributes(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_one_attribute_with_tuple_attributes",
            attributes=("x", "uint8", (1)),
        )

    def test_reload_dataconfig(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_reload_dataconfig",
            attributes=DataAttribute("x", "uint8", (1)),
        )
        self.data_config_file = "data_config_file.json"
        self.data_config.to_json_file(self.data_config_file)

        self.data_config = None

        self.data_config = DataConfig.from_json_file(self.data_config_file)

    def test_dataconfig_two_attributes(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_two_attributes",
            attributes=[
                DataAttribute("x", "uint8", (1)),
                DataAttribute("y", "uint8", (1)),
            ],
        )

    def test_dataconfig_two_attribute_with_tuple_attributes(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_two_attribute_with_tuple_attributes",
            attributes=[("x", "uint8", (1)), ("y", "uint8", (1))],
        )

    def test_dataconfig_attributes_already_exist(self):
        with self.assertRaisesRegex(KeyError, "is already exist in"):
            self.data_config = DataConfig(
                **self.storage_config,
                dataset_name="test_dataconfig_attributes_already_exist",
                attributes=[
                    DataAttribute("x", "uint8", (1)),
                    DataAttribute("x", "uint8", (1)),
                ],
            )

    def test_dataconfig_string_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_string_attribute",
            attributes=[DataAttribute("x", "string", (1), itemsize=32)],
        )

    def test_dataconfig_bool_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_bool_attribute",
            attributes=[DataAttribute("x", "bool", (1))],
        )

    def test_dataconfig_int8_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_int8_attribute",
            attributes=[DataAttribute("x", "int8", (1))],
        )

    def test_dataconfig_int16_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_int16_attribute",
            attributes=[DataAttribute("x", "int16", (1))],
        )

    def test_dataconfig_int32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_int32_attribute",
            attributes=[DataAttribute("x", "int32", (1))],
        )

    def test_dataconfig_uint8_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_uint8_attribute",
            attributes=[DataAttribute("x", "uint8", (1))],
        )

    def test_dataconfig_uint16_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_uint16_attribute",
            attributes=[DataAttribute("x", "uint16", (1))],
        )

    def test_dataconfig_uint32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_uint32_attribute",
            attributes=[DataAttribute("x", "uint32", (1))],
        )

    def test_dataconfig_uint64_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_uint64_attribute",
            attributes=[DataAttribute("x", "uint64", (1))],
        )

    def test_dataconfig_float32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_float32_attribute",
            attributes=[DataAttribute("x", "float32", (1))],
        )

    def test_dataconfig_float64_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_dataconfig_float64_attribute",
            attributes=[DataAttribute("x", "float64", (1))],
        )

    def test_datasaver_string_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_string_attribute",
            attributes=[DataAttribute("x", "string", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([["a", "b"], ["c", "d"], ["e", "f"]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_bool_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_bool_attribute",
            attributes=[DataAttribute("x", "bool", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[True, False], [False, True], [True, True]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_int8_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_int8_attribute",
            attributes=[DataAttribute("x", "int8", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_int16_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_int16_attribute",
            attributes=[DataAttribute("x", "int16", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_int32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_int32_attribute",
            attributes=[DataAttribute("x", "int32", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_int64_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_int64_attribute",
            attributes=[DataAttribute("x", "int64", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_uint8_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_uint8_attribute",
            attributes=[DataAttribute("x", "uint8", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_uint16_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_uint16_attribute",
            attributes=[DataAttribute("x", "uint16", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_uint32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_uint32_attribute",
            attributes=[DataAttribute("x", "uint32", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_uint64_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_uint64_attribute",
            attributes=[DataAttribute("x", "uint64", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1, 2], [3, 4], [5, 6]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_float32_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_float32_attribute",
            attributes=[DataAttribute("x", "float32", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_float64_attribute(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_float64_attribute",
            attributes=[DataAttribute("x", "float64", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_inmemory(self):
        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_inmemory",
            attributes=[DataAttribute("x", "float64", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config, inmemory=True)
        x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_zlib(self):
        for level in range(10):
            self.data_config = DataConfig(
                **self.storage_config,
                dataset_name="test_datasaver_zlib",
                attributes=[DataAttribute("x", "float64", (2), itemsize=32)],
                compressor={"complevel": level, "complib": "zlib"},
            )
            self.data_saver = DataSaver(config=self.data_config)
            x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
            self.assertEqual(x.shape, (3, 2))
            self.data_saver({"x": x})
            self.data_saver.disconnect()

    def test_datasaver_lzo(self):
        for level in range(10):
            self.data_config = DataConfig(
                **self.storage_config,
                dataset_name="test_datasaver_lzo",
                attributes=[DataAttribute("x", "float64", (2), itemsize=32)],
                compressor={"complevel": level, "complib": "lzo"},
            )
            self.data_saver = DataSaver(config=self.data_config)
            x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
            self.assertEqual(x.shape, (3, 2))
            self.data_saver({"x": x})
            self.data_saver.disconnect()

    def test_datasaver_bzip2(self):
        for level in range(10):
            self.data_config = DataConfig(
                **self.storage_config,
                dataset_name="test_datasaver_bzip2",
                attributes=[DataAttribute("x", "float64", (2), itemsize=32)],
                compressor={"complevel": level, "complib": "bzip2"},
            )
            self.data_saver = DataSaver(config=self.data_config)
            x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
            self.assertEqual(x.shape, (3, 2))
            self.data_saver({"x": x})
            self.data_saver.disconnect()

    def test_datasaver_blosc(self):
        for level in range(10):
            self.data_config = DataConfig(
                **self.storage_config,
                dataset_name="test_datasaver_blosc",
                attributes=[DataAttribute("x", "float64", (2), itemsize=32)],
                compressor={"complevel": level, "complib": "blosc"},
            )
            self.data_saver = DataSaver(config=self.data_config)
            x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
            self.assertEqual(x.shape, (3, 2))
            self.data_saver({"x": x})
            self.data_saver.disconnect()

    def test_datasaver_nas(self):

        self.data_config = DataConfig(
            endpoint="/tmp",
            dataset_name="test_datasaver_nas",
            attributes=[DataAttribute("x", "float64", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})
        self.data_saver.disconnect()

    def test_datasaver_refresh(self):

        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_refresh",
            attributes=[DataAttribute("x", "float64", (2), itemsize=32)],
        )
        for refresh in [False, True]:
            self.data_saver = DataSaver(config=self.data_config, refresh=refresh)
            x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
            self.assertEqual(x.shape, (3, 2))
            self.data_saver({"x": x})
            self.data_saver.disconnect()


    def test_datasaver_filetype(self):
        from matorage.torch import Dataset

        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_filetype",
            attributes=[DataAttribute("x", "float64", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

        _file = open("test.txt", "w")
        _file.write('this is test')
        self.data_saver({"file": "test.txt"}, filetype=True)
        _file.close()

        self.data_saver.disconnect()

        self.dataset = Dataset(config=self.data_config)
        self.assertEqual(
            self.dataset.get_filetype_list, ["file"]
        )
        _local_filepath = self.dataset.get_filetype_from_key("file")
        with open(_local_filepath, 'r') as f:
            self.assertEqual(f.read(), 'this is test')
class TorchDataTest(DataTest, unittest.TestCase):
    def test_torch_saver(self, data_config=None, save_to_json_file=False):
        if data_config is None:
            self.data_config = DataConfig(**self.storage_config,
                                          dataset_name="test_torch_saver",
                                          additional={"framework": "pytorch"},
                                          attributes=[
                                              DataAttribute("image",
                                                            "uint8", (2, 2),
                                                            itemsize=32),
                                              DataAttribute("target",
                                                            "uint8", (1),
                                                            itemsize=32),
                                          ])
        else:
            self.data_config = data_config

        if save_to_json_file:
            self.data_config_file = "data_config_file.json"
            self.data_config.to_json_file(self.data_config_file)

        self.data_saver = DataSaver(config=self.data_config)

        self.data_saver({
            "image":
            np.asarray([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]),
            "target":
            np.asarray([0, 1]),
        })
        self.data_saver.disconnect()

    def test_torch_loader(self):
        from matorage.torch import Dataset

        self.test_torch_saver()

        self.dataset = Dataset(config=self.data_config,
                               cache_folder_path=self.cache_folder_path)
        loader = DataLoader(self.dataset,
                            batch_size=64,
                            num_workers=8,
                            shuffle=True)

        for batch_idx, (image, target) in enumerate(tqdm(loader)):
            pass

    def test_torch_loader_with_compressor(self):
        from matorage.torch import Dataset

        data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_torch_loader_with_compressor",
            additional={"framework": "pytorch"},
            compressor={
                "complevel": 4,
                "complib": "zlib"
            },
            attributes=[
                DataAttribute("image", "uint8", (2, 2), itemsize=32),
                DataAttribute("target", "uint8", (1), itemsize=32),
            ])

        self.test_torch_saver(data_config=data_config)

        self.dataset = Dataset(config=data_config,
                               cache_folder_path=self.cache_folder_path)
        loader = DataLoader(self.dataset,
                            batch_size=64,
                            num_workers=8,
                            shuffle=True)

        for batch_idx, (image, target) in enumerate(tqdm(loader)):
            pass

    def test_torch_index(self):
        from matorage.torch import Dataset

        self.test_torch_saver()

        dataset = Dataset(config=self.data_config,
                          index=True,
                          cache_folder_path=self.cache_folder_path)

        assert torch.equal(dataset[0][0],
                           torch.tensor([[1, 2], [3, 4]], dtype=torch.uint8))
        assert torch.equal(dataset[0][1], torch.tensor([0], dtype=torch.uint8))

    def test_torch_index_with_compressor(self):
        from matorage.torch import Dataset

        data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_torch_index_with_compressor",
            additional={"framework": "pytorch"},
            compressor={
                "complevel": 4,
                "complib": "zlib"
            },
            attributes=[
                DataAttribute("image", "uint8", (2, 2), itemsize=32),
                DataAttribute("target", "uint8", (1), itemsize=32),
            ])

        self.test_torch_saver(data_config=data_config)

        dataset = Dataset(config=self.data_config,
                          index=True,
                          cache_folder_path=self.cache_folder_path)

        assert torch.equal(dataset[0][0],
                           torch.tensor([[1, 2], [3, 4]], dtype=torch.uint8))
        assert torch.equal(dataset[0][1], torch.tensor([0], dtype=torch.uint8))

    def test_saver_from_json_file(self):

        self.test_torch_saver(save_to_json_file=True)

        self.data_config = None
        self.data_saver = None

        self.data_config = DataConfig.from_json_file(self.data_config_file)

        self.data_saver = DataSaver(config=self.data_config)

        self.data_saver({
            "image":
            np.asarray([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]),
            "target":
            np.asarray([0, 1]),
        })
        self.data_saver.disconnect()

    def test_loader_from_json_file(self):
        from matorage.torch import Dataset

        self.test_torch_saver(save_to_json_file=True)

        self.data_config = None

        self.data_config = DataConfig.from_json_file(self.data_config_file)

        self.dataset = Dataset(config=self.data_config,
                               cache_folder_path=self.cache_folder_path)
        loader = DataLoader(self.dataset,
                            batch_size=64,
                            num_workers=8,
                            shuffle=True)

        for batch_idx, (image, target) in enumerate(tqdm(loader)):
            pass

    def test_torch_not_clear(self):
        from matorage.torch import Dataset

        self.test_torch_loader()

        if os.path.exists(self.dataset.cache_path):
            with open(self.dataset.cache_path) as f:
                _pre_file_mapper = json.load(f)

        self.dataset = Dataset(config=self.data_config,
                               clear=False,
                               cache_folder_path=self.cache_folder_path)

        if os.path.exists(self.dataset.cache_path):
            with open(self.dataset.cache_path) as f:
                _next_file_mapper = json.load(f)

        self.assertEqual(_pre_file_mapper, _next_file_mapper)

    def test_datasaver_filetype(self):
        from matorage.torch import Dataset

        self.data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_datasaver_filetype",
            attributes=[DataAttribute("x", "float64", (2), itemsize=32)],
        )
        self.data_saver = DataSaver(config=self.data_config)
        x = np.asarray([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        self.assertEqual(x.shape, (3, 2))
        self.data_saver({"x": x})

        _file = open("test.txt", "w")
        _file.write('this is test')
        self.data_saver({"file": "test.txt"}, filetype=True)
        _file.close()

        self.data_saver.disconnect()

        self.dataset = Dataset(config=self.data_config,
                               cache_folder_path=self.cache_folder_path)
        self.assertEqual(self.dataset.get_filetype_list, ["file"])
        _local_filepath = self.dataset.get_filetype_from_key("file")
        with open(_local_filepath, 'r') as f:
            self.assertEqual(f.read(), 'this is test')

    def test_torch_saver_nas(self):
        self.data_config = DataConfig(**self.nas_config,
                                      dataset_name="test_torch_saver_nas",
                                      additional={"framework": "pytorch"},
                                      attributes=[
                                          DataAttribute("image",
                                                        "uint8", (2, 2),
                                                        itemsize=32),
                                          DataAttribute("target",
                                                        "uint8", (1),
                                                        itemsize=32),
                                      ])

        self.data_saver = DataSaver(config=self.data_config)

        self.data_saver({
            "image":
            np.asarray([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]),
            "target":
            np.asarray([0, 1]),
        })
        self.data_saver.disconnect()

    def test_torch_loader_nas(self):
        from matorage.torch import Dataset

        self.test_torch_saver_nas()

        self.dataset = Dataset(config=self.data_config,
                               cache_folder_path=self.cache_folder_path)
        loader = DataLoader(self.dataset,
                            batch_size=64,
                            num_workers=8,
                            shuffle=True)

        for batch_idx, (image, target) in enumerate(tqdm(loader)):
            pass
Exemple #5
0
class TorchDataTest(DataTest, unittest.TestCase):
    def test_torch_saver(self, data_config=None, save_to_json_file=False):
        if data_config is None:
            self.data_config = DataConfig(
                **self.storage_config,
                dataset_name="test_torch_saver",
                additional={"framework": "pytorch"},
                attributes=[
                    DataAttribute("image", "uint8", (2, 2), itemsize=32),
                    DataAttribute("target", "uint8", (1), itemsize=32),
                ]
            )
        else:
            self.data_config = data_config

        if save_to_json_file:
            self.data_config_file = "data_config_file.json"
            self.data_config.to_json_file(self.data_config_file)

        self.data_saver = DataSaver(config=self.data_config)

        self.data_saver(
            {
                "image": np.asarray([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]),
                "target": np.asarray([0, 1]),
            }
        )
        self.data_saver.disconnect()

    def test_torch_loader(self):
        from matorage.torch import Dataset

        self.test_torch_saver()

        self.dataset = Dataset(config=self.data_config)
        loader = DataLoader(self.dataset, batch_size=64, num_workers=8, shuffle=True)

        for batch_idx, (image, target) in enumerate(tqdm(loader)):
            pass

    def test_torch_loader_with_compressor(self):
        from matorage.torch import Dataset

        data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_torch_loader_with_compressor",
            additional={"framework": "pytorch"},
            compressor={"complevel": 4, "complib": "zlib"},
            attributes=[
                DataAttribute("image", "uint8", (2, 2), itemsize=32),
                DataAttribute("target", "uint8", (1), itemsize=32),
            ]
        )

        self.test_torch_saver(data_config=data_config)

        self.dataset = Dataset(config=data_config)
        loader = DataLoader(self.dataset, batch_size=64, num_workers=8, shuffle=True)

        for batch_idx, (image, target) in enumerate(tqdm(loader)):
            pass

    def test_torch_index(self):
        from matorage.torch import Dataset

        self.test_torch_saver()

        dataset = Dataset(config=self.data_config, index=True)

        assert torch.equal(
            dataset[0][0], torch.tensor([[1, 2], [3, 4]], dtype=torch.uint8)
        )
        assert torch.equal(dataset[0][1], torch.tensor([0], dtype=torch.uint8))

    def test_torch_index_with_compressor(self):
        from matorage.torch import Dataset

        data_config = DataConfig(
            **self.storage_config,
            dataset_name="test_torch_index_with_compressor",
            additional={"framework": "pytorch"},
            compressor={"complevel": 4, "complib": "zlib"},
            attributes=[
                DataAttribute("image", "uint8", (2, 2), itemsize=32),
                DataAttribute("target", "uint8", (1), itemsize=32),
            ]
        )

        self.test_torch_saver(data_config=data_config)

        dataset = Dataset(config=self.data_config, index=True)

        assert torch.equal(
            dataset[0][0], torch.tensor([[1, 2], [3, 4]], dtype=torch.uint8)
        )
        assert torch.equal(dataset[0][1], torch.tensor([0], dtype=torch.uint8))

    def test_saver_from_json_file(self):

        self.test_torch_saver(save_to_json_file=True)

        self.data_config = None
        self.data_saver = None

        self.data_config = DataConfig.from_json_file(self.data_config_file)

        self.data_saver = DataSaver(config=self.data_config)

        self.data_saver(
            {
                "image": np.asarray([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]),
                "target": np.asarray([0, 1]),
            }
        )
        self.data_saver.disconnect()

    def test_loader_from_json_file(self):
        from matorage.torch import Dataset

        self.test_torch_saver(save_to_json_file=True)

        self.data_config = None

        self.data_config = DataConfig.from_json_file(self.data_config_file)

        self.dataset = Dataset(config=self.data_config)
        loader = DataLoader(self.dataset, batch_size=64, num_workers=8, shuffle=True)

        for batch_idx, (image, target) in enumerate(tqdm(loader)):
            pass