예제 #1
0
 def dict_to_hub(dic, path=""):
     d = {}
     for k, v in dic.items():
         k = k.replace("/", "_")
         cur_path = path + "/" + k
         if isinstance(v, dict):
             d[k] = dict_to_hub(v, path=cur_path)
         else:
             value_shape = v.shape if hasattr(v, "shape") else ()
             if isinstance(v, torch.Tensor):
                 v = v.numpy()
             shape = tuple(None for it in value_shape)
             max_shape = (
                 max_dict[cur_path] or tuple(10000 for it in value_shape)
                 if not isinstance(v, str)
                 else (10000,)
             )
             dtype = v.dtype.name if hasattr(v, "dtype") else type(v)
             dtype = "int64" if isinstance(v, str) else dtype
             d[k] = (
                 Tensor(shape=shape, dtype=dtype, max_shape=max_shape)
                 if not isinstance(v, str)
                 else Text(shape=(None,), dtype=dtype, max_shape=max_shape)
             )
     return SchemaDict(d)
예제 #2
0
 def sdict_to_hub(tf_dt, path=""):
     d = {}
     for key, value in tf_dt.items():
         key = key.replace("/", "_")
         cur_path = path + "/" + key
         d[key] = to_hub(value, max_dict[cur_path], cur_path)
     return SchemaDict(d)
예제 #3
0
 def fdict_to_hub(tf_dt):
     d = {
         key.replace("/", "_"): to_hub(
             value,
             max_dict[key.replace("/", "_")],
             bytes_dict[key.replace("/", "_")],
         )
         for key, value in tf_dt.items()
     }
     return SchemaDict(d)
예제 #4
0
 def dict_to_hub(d):
     for k, v in d.items():
         k = k.replace("/", "_")
         if isinstance(v, dict):
             d[k] = dict_to_hub(v)
         else:
             value_shape = v.shape if hasattr(v, "shape") else ()
             shape = tuple([None for it in value_shape])
             max_shape = tuple([10000 for it in value_shape])
             if isinstance(v, torch.Tensor):
                 v = v.numpy()
             dtype = v.dtype.name if hasattr(v, "dtype") else type(v)
             dtype = "int64" if isinstance(v, str) else dtype
             d[k] = (
                 Tensor(shape=shape, dtype=dtype, max_shape=max_shape)
                 if not isinstance(v, str) else Text(
                     shape=(None, ), dtype=dtype, max_shape=(10000, )))
     return SchemaDict(d)
예제 #5
0
def test_feature_dict_repr():
    input_dict = {"myint": int, "mystr": str}
    feature_dict_object = SchemaDict(input_dict)
    expected_output = "SchemaDict({'myint': 'int64', 'mystr': '<U0'})"
    assert expected_output == feature_dict_object.__repr__()
예제 #6
0
 def dict_to_hub(tf_dt):
     d = {key.replace("/", "_"): tf_to_hub(value) for key, value in tf_dt.items()}
     return SchemaDict(d)
예제 #7
0
def deserialize(inp):
    if isinstance(inp, dict):
        if inp["type"] == "Audio":
            return Audio(
                shape=tuple(inp["shape"]),
                dtype=deserialize(inp["dtype"]),
                file_format=inp["file_format"],
                sample_rate=inp["sample_rate"],
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "BBox":
            return BBox(
                shape=tuple(inp["shape"]),
                dtype=deserialize(inp["dtype"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
                max_shape=tuple(inp["max_shape"]),
            )
        elif inp["type"] == "ClassLabel":
            if inp["_names"] is not None:
                return ClassLabel(
                    shape=tuple(inp["shape"]),
                    dtype=deserialize(inp["dtype"]),
                    names=inp["_names"],
                    chunks=inp["chunks"],
                    compressor=_get_compressor(inp),
                    max_shape=tuple(inp["max_shape"]),
                )
            else:
                return ClassLabel(
                    shape=tuple(inp["shape"]),
                    dtype=deserialize(inp["dtype"]),
                    num_classes=inp["_num_classes"],
                    chunks=inp["chunks"],
                    compressor=_get_compressor(inp),
                    max_shape=tuple(inp["max_shape"]),
                )
        elif inp["type"] == "SchemaDict" or inp["type"] == "FeatureDict":
            d = {}
            for k, v in inp["items"].items():
                d[k] = deserialize(v)
            return SchemaDict(d)
        elif inp["type"] == "Image":
            return Image(
                shape=tuple(inp["shape"]),
                dtype=deserialize(inp["dtype"]),
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Mask":
            return Mask(
                shape=tuple(inp["shape"]),
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Polygon":
            return Polygon(
                shape=tuple(inp["shape"]),
                max_shape=tuple(inp["max_shape"]),
                dtype=deserialize(inp["dtype"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Primitive":
            return Primitive(
                dtype=deserialize(inp["dtype"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Segmentation":
            class_labels = deserialize(inp["class_labels"])
            if class_labels._names is not None:
                return Segmentation(
                    shape=tuple(inp["shape"]),
                    dtype=deserialize(inp["dtype"]),
                    names=class_labels._names,
                    max_shape=tuple(inp["max_shape"]),
                    chunks=inp["chunks"],
                    compressor=_get_compressor(inp),
                )
            else:
                return Segmentation(
                    shape=tuple(inp["shape"]),
                    dtype=deserialize(inp["dtype"]),
                    num_classes=class_labels._num_classes,
                    max_shape=tuple(inp["max_shape"]),
                    chunks=inp["chunks"],
                    compressor=_get_compressor(inp),
                )
        elif inp["type"] == "Sequence":
            return Sequence(
                shape=tuple(inp["shape"]),
                dtype=deserialize(inp["dtype"]),
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Tensor":
            return Tensor(
                tuple(inp["shape"]),
                deserialize(inp["dtype"]),
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Text":
            return Text(
                tuple(inp["shape"]),
                deserialize(inp["dtype"]),
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Video":
            return Video(
                shape=tuple(inp["shape"]),
                dtype=deserialize(inp["dtype"]),
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
    else:
        return inp
예제 #8
0
def deserialize(inp):
    if isinstance(inp, dict):
        if inp["type"] == "Audio":
            return Audio(
                shape=tuple(inp["shape"]),
                dtype=deserialize(inp["dtype"]),
                file_format=inp["file_format"],
                sample_rate=inp["sample_rate"],
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "BBox":
            return BBox(
                dtype=deserialize(inp["dtype"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "ClassLabel":
            if "_num_classes" in inp.keys():
                return ClassLabel(
                    num_classes=inp["_num_classes"],
                    chunks=inp["chunks"],
                    compressor=_get_compressor(inp),
                )
            else:
                return ClassLabel(
                    names=inp["names"],
                    chunks=inp["chunks"],
                    compressor=_get_compressor(inp),
                )
        elif inp["type"] == "SchemaDict" or inp["type"] == "FeatureDict":
            d = {}
            for k, v in inp["items"].items():
                d[k] = deserialize(v)
            return SchemaDict(d)
        elif inp["type"] == "Image":
            return Image(
                shape=tuple(inp["shape"]),
                dtype=deserialize(inp["dtype"]),
                # TODO uncomment back when image encoding will be added
                # encoding_format=inp["encoding_format"],
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Mask":
            return Mask(
                shape=tuple(inp["shape"]),
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Polygon":
            return Polygon(
                shape=tuple(inp["shape"]),
                max_shape=tuple(inp["max_shape"]),
                dtype=deserialize(inp["dtype"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Segmentation":
            class_labels = deserialize(inp["class_labels"])
            if hasattr(class_labels, "_num_classes"):
                return Segmentation(
                    shape=tuple(inp["shape"]),
                    dtype=deserialize(inp["dtype"]),
                    num_classes=class_labels._num_classes,
                    max_shape=tuple(inp["max_shape"]),
                    chunks=inp["chunks"],
                    compressor=_get_compressor(inp),
                )
            else:
                return Segmentation(
                    shape=tuple(inp["shape"]),
                    dtype=deserialize(inp["dtype"]),
                    names=class_labels.names,
                    max_shape=tuple(inp["max_shape"]),
                    chunks=inp["chunks"],
                    compressor=_get_compressor(inp),
                )
        elif inp["type"] == "Sequence":
            return Sequence(
                shape=tuple(inp["shape"]),
                dtype=deserialize(inp["dtype"]),
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Tensor":
            return Tensor(
                tuple(inp["shape"]),
                deserialize(inp["dtype"]),
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Text":
            return Text(
                tuple(inp["shape"]),
                deserialize(inp["dtype"]),
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
        elif inp["type"] == "Video":
            return Video(
                shape=tuple(inp["shape"]),
                dtype=deserialize(inp["dtype"]),
                # TODO uncomment back when image encoding will be added
                # encoding_format=inp["encoding_format"],
                max_shape=tuple(inp["max_shape"]),
                chunks=inp["chunks"],
                compressor=_get_compressor(inp),
            )
    else:
        return inp