def test_serialize_deserialize(): t = Tensor( shape=(100, 200), dtype={ "image": Image(shape=(300, 400, 3), dtype="uint8"), "label": Tensor( shape=(5000, ), dtype={ "first": { "a": "<U20", "b": "uint32", "c": ClassLabel(num_classes=3), }, "second": "float64", }, ), "bbox": BBox(dtype="float64"), "audio": Audio(shape=(120, ), dtype="uint32"), "mask": Mask(shape=(5, 8, 1)), "polygon": Polygon(shape=(16, 2)), "segmentation1": Segmentation(shape=(5, 9, 1), dtype="uint8", num_classes=5), "segmentation2": Segmentation(shape=(5, 9, 1), dtype="uint8", names=("apple", "orange", "pineapple")), "sequence": Sequence(dtype=Tensor(shape=(None, None), max_shape=(100, 100), dtype="uint8"), ), "text": Text((None, ), max_shape=(10, )), "video": Video((100, 100, 3, 10)), }, ) original_result = tuple(t._flatten()) original_paths = [r.path for r in original_result] original_shapes = [r.shape for r in original_result] origanal_dtypes = [str(r.dtype) for r in original_result] serialize_t = serialize(t) deserialize_t = deserialize(serialize_t) result = tuple(deserialize_t._flatten()) paths = [r.path for r in result] shapes = [r.shape for r in result] dtypes = [str(r.dtype) for r in result] assert paths == original_paths assert shapes == original_shapes assert dtypes == origanal_dtypes
def deserialize(inp): if isinstance(inp, dict): if inp["type"] == "Audio": return Audio( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), file_format=inp["file_format"], sample_rate=inp["sample_rate"], max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "BBox": return BBox( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), chunks=inp["chunks"], compressor=_get_compressor(inp), max_shape=tuple(inp["max_shape"]), ) elif inp["type"] == "ClassLabel": if inp["_names"] is not None: return ClassLabel( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), names=inp["_names"], chunks=inp["chunks"], compressor=_get_compressor(inp), max_shape=tuple(inp["max_shape"]), ) else: return ClassLabel( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), num_classes=inp["_num_classes"], chunks=inp["chunks"], compressor=_get_compressor(inp), max_shape=tuple(inp["max_shape"]), ) elif inp["type"] == "SchemaDict" or inp["type"] == "FeatureDict": d = {} for k, v in inp["items"].items(): d[k] = deserialize(v) return SchemaDict(d) elif inp["type"] == "Image": return Image( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Mask": return Mask( shape=tuple(inp["shape"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Polygon": return Polygon( shape=tuple(inp["shape"]), max_shape=tuple(inp["max_shape"]), dtype=deserialize(inp["dtype"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Primitive": return Primitive( dtype=deserialize(inp["dtype"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Segmentation": class_labels = deserialize(inp["class_labels"]) if class_labels._names is not None: return Segmentation( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), names=class_labels._names, max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) else: return Segmentation( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), num_classes=class_labels._num_classes, max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Sequence": return Sequence( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Tensor": return Tensor( tuple(inp["shape"]), deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Text": return Text( tuple(inp["shape"]), deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Video": return Video( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) else: return inp
def test_audio_repr(): audio = Audio((100,)) text = "Audio(shape=(100,), dtype='int64')" assert audio.__repr__() == text
def test_audio(): with pytest.raises(ValueError): audio = Audio((1920, 3), "float32")
def deserialize(inp): if isinstance(inp, dict): if inp["type"] == "Audio": return Audio( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), file_format=inp["file_format"], sample_rate=inp["sample_rate"], max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "BBox": return BBox( dtype=deserialize(inp["dtype"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "ClassLabel": if "_num_classes" in inp.keys(): return ClassLabel( num_classes=inp["_num_classes"], chunks=inp["chunks"], compressor=_get_compressor(inp), ) else: return ClassLabel( names=inp["names"], chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "SchemaDict" or inp["type"] == "FeatureDict": d = {} for k, v in inp["items"].items(): d[k] = deserialize(v) return SchemaDict(d) elif inp["type"] == "Image": return Image( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), # TODO uncomment back when image encoding will be added # encoding_format=inp["encoding_format"], max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Mask": return Mask( shape=tuple(inp["shape"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Polygon": return Polygon( shape=tuple(inp["shape"]), max_shape=tuple(inp["max_shape"]), dtype=deserialize(inp["dtype"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Segmentation": class_labels = deserialize(inp["class_labels"]) if hasattr(class_labels, "_num_classes"): return Segmentation( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), num_classes=class_labels._num_classes, max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) else: return Segmentation( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), names=class_labels.names, max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Sequence": return Sequence( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Tensor": return Tensor( tuple(inp["shape"]), deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Text": return Text( tuple(inp["shape"]), deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Video": return Video( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), # TODO uncomment back when image encoding will be added # encoding_format=inp["encoding_format"], max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) else: return inp