def dict_to_hub(dic, path=""): d = {} for k, v in dic.items(): k = k.replace("/", "_") cur_path = path + "/" + k if isinstance(v, dict): d[k] = dict_to_hub(v, path=cur_path) else: value_shape = v.shape if hasattr(v, "shape") else () if isinstance(v, torch.Tensor): v = v.numpy() shape = tuple(None for it in value_shape) max_shape = ( max_dict[cur_path] or tuple(10000 for it in value_shape) if not isinstance(v, str) else (10000,) ) dtype = v.dtype.name if hasattr(v, "dtype") else type(v) dtype = "int64" if isinstance(v, str) else dtype d[k] = ( Tensor(shape=shape, dtype=dtype, max_shape=max_shape) if not isinstance(v, str) else Text(shape=(None,), dtype=dtype, max_shape=max_shape) ) return SchemaDict(d)
def sdict_to_hub(tf_dt, path=""): d = {} for key, value in tf_dt.items(): key = key.replace("/", "_") cur_path = path + "/" + key d[key] = to_hub(value, max_dict[cur_path], cur_path) return SchemaDict(d)
def fdict_to_hub(tf_dt): d = { key.replace("/", "_"): to_hub( value, max_dict[key.replace("/", "_")], bytes_dict[key.replace("/", "_")], ) for key, value in tf_dt.items() } return SchemaDict(d)
def dict_to_hub(d): for k, v in d.items(): k = k.replace("/", "_") if isinstance(v, dict): d[k] = dict_to_hub(v) else: value_shape = v.shape if hasattr(v, "shape") else () shape = tuple([None for it in value_shape]) max_shape = tuple([10000 for it in value_shape]) if isinstance(v, torch.Tensor): v = v.numpy() dtype = v.dtype.name if hasattr(v, "dtype") else type(v) dtype = "int64" if isinstance(v, str) else dtype d[k] = ( Tensor(shape=shape, dtype=dtype, max_shape=max_shape) if not isinstance(v, str) else Text( shape=(None, ), dtype=dtype, max_shape=(10000, ))) return SchemaDict(d)
def test_feature_dict_repr(): input_dict = {"myint": int, "mystr": str} feature_dict_object = SchemaDict(input_dict) expected_output = "SchemaDict({'myint': 'int64', 'mystr': '<U0'})" assert expected_output == feature_dict_object.__repr__()
def dict_to_hub(tf_dt): d = {key.replace("/", "_"): tf_to_hub(value) for key, value in tf_dt.items()} return SchemaDict(d)
def deserialize(inp): if isinstance(inp, dict): if inp["type"] == "Audio": return Audio( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), file_format=inp["file_format"], sample_rate=inp["sample_rate"], max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "BBox": return BBox( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), chunks=inp["chunks"], compressor=_get_compressor(inp), max_shape=tuple(inp["max_shape"]), ) elif inp["type"] == "ClassLabel": if inp["_names"] is not None: return ClassLabel( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), names=inp["_names"], chunks=inp["chunks"], compressor=_get_compressor(inp), max_shape=tuple(inp["max_shape"]), ) else: return ClassLabel( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), num_classes=inp["_num_classes"], chunks=inp["chunks"], compressor=_get_compressor(inp), max_shape=tuple(inp["max_shape"]), ) elif inp["type"] == "SchemaDict" or inp["type"] == "FeatureDict": d = {} for k, v in inp["items"].items(): d[k] = deserialize(v) return SchemaDict(d) elif inp["type"] == "Image": return Image( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Mask": return Mask( shape=tuple(inp["shape"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Polygon": return Polygon( shape=tuple(inp["shape"]), max_shape=tuple(inp["max_shape"]), dtype=deserialize(inp["dtype"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Primitive": return Primitive( dtype=deserialize(inp["dtype"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Segmentation": class_labels = deserialize(inp["class_labels"]) if class_labels._names is not None: return Segmentation( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), names=class_labels._names, max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) else: return Segmentation( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), num_classes=class_labels._num_classes, max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Sequence": return Sequence( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Tensor": return Tensor( tuple(inp["shape"]), deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Text": return Text( tuple(inp["shape"]), deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Video": return Video( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) else: return inp
def deserialize(inp): if isinstance(inp, dict): if inp["type"] == "Audio": return Audio( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), file_format=inp["file_format"], sample_rate=inp["sample_rate"], max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "BBox": return BBox( dtype=deserialize(inp["dtype"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "ClassLabel": if "_num_classes" in inp.keys(): return ClassLabel( num_classes=inp["_num_classes"], chunks=inp["chunks"], compressor=_get_compressor(inp), ) else: return ClassLabel( names=inp["names"], chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "SchemaDict" or inp["type"] == "FeatureDict": d = {} for k, v in inp["items"].items(): d[k] = deserialize(v) return SchemaDict(d) elif inp["type"] == "Image": return Image( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), # TODO uncomment back when image encoding will be added # encoding_format=inp["encoding_format"], max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Mask": return Mask( shape=tuple(inp["shape"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Polygon": return Polygon( shape=tuple(inp["shape"]), max_shape=tuple(inp["max_shape"]), dtype=deserialize(inp["dtype"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Segmentation": class_labels = deserialize(inp["class_labels"]) if hasattr(class_labels, "_num_classes"): return Segmentation( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), num_classes=class_labels._num_classes, max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) else: return Segmentation( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), names=class_labels.names, max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Sequence": return Sequence( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Tensor": return Tensor( tuple(inp["shape"]), deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Text": return Text( tuple(inp["shape"]), deserialize(inp["dtype"]), max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) elif inp["type"] == "Video": return Video( shape=tuple(inp["shape"]), dtype=deserialize(inp["dtype"]), # TODO uncomment back when image encoding will be added # encoding_format=inp["encoding_format"], max_shape=tuple(inp["max_shape"]), chunks=inp["chunks"], compressor=_get_compressor(inp), ) else: return inp