Esempio n. 1
0
def from_array(arr, name=None):  # type: (np.ndarray[Any], Optional[Text]) -> TensorProto
    """Converts a numpy array to a tensor def.

    Inputs:
        arr: a numpy array.
        name: (optional) the name of the tensor.
    Returns:
        tensor_def: the converted tensor def.
    """
    tensor = TensorProto()
    tensor.dims.extend(arr.shape)
    if name:
        tensor.name = name

    if arr.dtype == np.object:
        # Special care for strings.
        raise NotImplementedError("Need to properly implement string.")
    # For numerical types, directly use numpy raw bytes.
    try:
        dtype = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype]
    except KeyError:
        raise RuntimeError(
            "Numpy data type not understood yet: {}".format(str(arr.dtype)))
    tensor.data_type = dtype
    tensor.raw_data = arr.tobytes()  # note: tobytes() is only after 1.9.

    return tensor
Esempio n. 2
0
def make_tensor(
        name,  # type: Text
        data_type,  # type: TensorProto.DataType
        dims,  # type: Sequence[int]
        vals,  # type: Any
        raw=False  # type: bool
):  # type: (...) -> TensorProto
    '''
    Make a TensorProto with specified arguments.  If raw is False, this
    function will choose the corresponding proto field to store the
    values based on data_type. If raw is True, use "raw_data" proto
    field to store the values, and values should be of type bytes in
    this case.
    '''
    tensor = TensorProto()
    tensor.data_type = data_type
    tensor.name = name

    if data_type == TensorProto.STRING:
        assert not raw, "Can not use raw_data to store string type"

    if (data_type == TensorProto.COMPLEX64 or
            data_type == TensorProto.COMPLEX128):
        vals = split_complex_to_pairs(vals)
    if raw:
        tensor.raw_data = vals
    else:
        field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[
            mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]]
        getattr(tensor, field).extend(vals)

    tensor.dims.extend(dims)
    return tensor
Esempio n. 3
0
def from_array(
        arr,
        name=None):  # type: (np.ndarray[Any], Optional[Text]) -> TensorProto
    """Converts a numpy array to a tensor def.

    Inputs:
        arr: a numpy array.
        name: (optional) the name of the tensor.
    Returns:
        tensor_def: the converted tensor def.
    """
    tensor = TensorProto()
    tensor.dims.extend(arr.shape)
    if name:
        tensor.name = name

    if arr.dtype == np.object:
        # Special care for strings.
        tensor.data_type = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype]
        # TODO: Introduce full string support.
        # We flatten the array in case there are 2-D arrays are specified
        # We throw the error below if we have a 3-D array or some kind of other
        # object. If you want more complex shapes then follow the below instructions.
        # Unlike other types where the shape is automatically inferred from
        # nested arrays of values, the only reliable way now to feed strings
        # is to put them into a flat array then specify type astype(np.object)
        # (otherwise all strings may have different types depending on their length)
        # and then specify shape .reshape([x, y, z])
        flat_array = arr.flatten()
        for e in flat_array:
            if isinstance(e, text_type):
                tensor.string_data.append(e.encode('utf-8'))
            elif isinstance(e, np.ndarray):
                for s in e:
                    if isinstance(s, text_type):
                        tensor.string_data.append(s.encode('utf-8'))
                    elif isinstance(s, bytes):
                        tensor.string_data.append(s)
            elif isinstance(e, bytes):
                tensor.string_data.append(e)
            else:
                raise NotImplementedError(
                    "Unrecognized object in the object array, expect a string, or array of bytes: ",
                    str(type(e)))
        return tensor

    # For numerical types, directly use numpy raw bytes.
    try:
        dtype = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype]
    except KeyError:
        raise RuntimeError("Numpy data type not understood yet: {}".format(
            str(arr.dtype)))
    tensor.data_type = dtype
    tensor.raw_data = arr.tobytes()  # note: tobytes() is only after 1.9.
    if sys.byteorder == 'big':
        # Convert endian from big to little
        convert_endian(tensor)

    return tensor
Esempio n. 4
0
def make_external_tensor(name, data_type, dims, raw_data=None, **kwargs):
    tensor = TensorProto()
    tensor.data_type = data_type
    tensor.name = name
    tensor.dims.extend(dims)
    if raw_data is not None:
        tensor.raw_data = raw_data
    external_data_helper.set_external_data(tensor, **kwargs)
    order_repeated_field(tensor.external_data, 'key', kwargs.keys())
    return tensor
Esempio n. 5
0
def convert_endian(tensor: TensorProto) -> None:
    """
    call to convert endianess of raw data in tensor.
    @params
    TensorProto: TensorProto to be converted.
    """
    tensor_dtype = tensor.data_type
    np_dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[tensor_dtype]
    tensor.raw_data = np.frombuffer(tensor.raw_data,
                                    dtype=np_dtype).byteswap().tobytes()
Esempio n. 6
0
    def test_check_string_tensor(self):
        tensor = TensorProto()
        tensor.data_type = TensorProto.STRING
        tensor.dims.append(1)
        tensor.string_data.append('Test'.encode('utf-8'))
        checker.check_tensor(tensor)

        del tensor.string_data[:]
        tensor.raw_data = 'Test'.encode('utf-8')
        # string data should not be stored in raw_data field
        self.assertRaises(checker.ValidationError, checker.check_tensor, tensor)
Esempio n. 7
0
    def test_check_string_tensor(self):  # type: () -> None
        tensor = TensorProto()
        tensor.data_type = TensorProto.STRING
        tensor.dims.append(1)
        tensor.string_data.append('Test'.encode('utf-8'))
        checker.check_tensor(tensor)

        del tensor.string_data[:]
        tensor.raw_data = 'Test'.encode('utf-8')
        # string data should not be stored in raw_data field
        self.assertRaises(checker.ValidationError, checker.check_tensor, tensor)
Esempio n. 8
0
def make_tensor(
        name,  # type: Text
        data_type,  # type: int
        dims,  # type: Sequence[int]
        vals,  # type: Any
        raw=False  # type: bool
):  # type: (...) -> TensorProto
    '''
    Make a TensorProto with specified arguments.  If raw is False, this
    function will choose the corresponding proto field to store the
    values based on data_type. If raw is True, use "raw_data" proto
    field to store the values, and values should be of type bytes in
    this case.
    '''
    tensor = TensorProto()
    tensor.data_type = data_type
    tensor.name = name

    if data_type == TensorProto.STRING:
        assert not raw, "Can not use raw_data to store string type"

    # Check number of vals specified equals tensor size
    expected_size = 1 if (not raw) else (
        mapping.TENSOR_TYPE_TO_NP_TYPE[data_type].itemsize)
    # Flatten a numpy array if its rank > 1
    if type(vals) is np.ndarray and len(vals.shape) > 1:
        vals = vals.flatten()
    for d in dims:
        expected_size = expected_size * d

    if len(vals) != expected_size:
        raise ValueError(
            "Number of values does not match tensor's size. Expected {}, but it is {}. "
            .format(expected_size, len(vals)))

    if raw:
        tensor.raw_data = vals
    else:
        if (data_type == TensorProto.COMPLEX64
                or data_type == TensorProto.COMPLEX128):
            vals = split_complex_to_pairs(vals)
        # floa16/bfloat16 are stored as uint16
        elif (data_type == TensorProto.FLOAT16
              or data_type == TensorProto.BFLOAT16):
            vals = np.array(vals).astype(
                np.float16).view(dtype=np.uint16).flatten().tolist()
        field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[
            mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]]
        getattr(tensor, field).extend(vals)
    tensor.dims.extend(dims)
    return tensor
Esempio n. 9
0
def make_tensor(name: str, vals: np.ndarray) -> ITensorProto:
    """
    Make a TensorProto with specified arguments.  If raw is False, this
    function will choose the corresponding proto field to store the
    values based on data_type. If raw is True, use "raw_data" proto
    field to store the values, and values should be of type bytes in
    this case.
    """
    vals = vals.astype(np.float32)

    tensor = TensorProto()
    tensor.data_type = DataType.FLOAT
    tensor.name = name
    tensor.raw_data = vals.tobytes()
    tensor.dims.extend(vals.shape)
    return tensor
def _strip_raw_data(tensor: onnx.TensorProto) -> onnx.TensorProto:
    arr = onnx.numpy_helper.to_array(tensor)
    meta_dict = {}
    meta_dict['type'] = "stripped"
    meta_dict['average'] = float(arr.mean())  # type: ignore[assignment]
    meta_dict['variance'] = float(arr.var())  # type: ignore[assignment]
    if not tensor.HasField("raw_data"):
        tensor.raw_data = onnx.numpy_helper.from_array(arr,
                                                       tensor.name).raw_data
    onnx.external_data_helper.set_external_data(tensor,
                                                location=json.dumps(meta_dict),
                                                length=arr.nbytes)
    tensor.data_location = onnx.TensorProto.EXTERNAL
    tensor.ClearField('raw_data')
    tensor.ClearField('float_data')
    return tensor
Esempio n. 11
0
File: helper.py Progetto: zoq/onnx
def make_tensor(
        name,  # type: Text
        data_type,  # type: int
        dims,  # type: Sequence[int]
        vals,  # type: Any
        raw=False  # type: bool
):  # type: (...) -> TensorProto
    '''
    Make a TensorProto with specified arguments.  If raw is False, this
    function will choose the corresponding proto field to store the
    values based on data_type. If raw is True, use "raw_data" proto
    field to store the values, and values should be of type bytes in
    this case.
    '''
    tensor = TensorProto()
    tensor.data_type = data_type
    tensor.name = name

    if data_type == TensorProto.STRING:
        assert not raw, "Can not use raw_data to store string type"

    # Check number of vals specified equals tensor size
    size = 1 if (not raw) else (
        mapping.TENSOR_TYPE_TO_NP_TYPE[data_type].itemsize)
    for d in dims:
        size = size * d
    if (len(vals) != size):
        raise ValueError("Number of values does not match tensor's size.")

    if (data_type == TensorProto.COMPLEX64
            or data_type == TensorProto.COMPLEX128):
        vals = split_complex_to_pairs(vals)

    if raw:
        tensor.raw_data = vals
    else:
        field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[
            mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]]
        getattr(tensor, field).extend(vals)
    tensor.dims.extend(dims)
    return tensor
Esempio n. 12
0
def from_array(
        arr,
        name=None):  # type: (np.ndarray[Any], Optional[Text]) -> TensorProto
    """Converts a numpy array to a tensor def.

    Inputs:
        arr: a numpy array.
        name: (optional) the name of the tensor.
    Returns:
        tensor_def: the converted tensor def.
    """
    tensor = TensorProto()
    tensor.dims.extend(arr.shape)
    if name:
        tensor.name = name

    if arr.dtype == np.object:
        # Special care for strings.
        tensor.data_type = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype]
        for e in arr:
            if isinstance(e, text_type):
                tensor.string_data.append(e.encode('utf-8'))
            elif isinstance(e, np.ndarray):
                tensor.string_data.append(e.tobytes())
            else:
                raise NotImplementedError(
                    "Unrecognized object in the object array, expect a string, or array of bytes"
                )
        return tensor

    # For numerical types, directly use numpy raw bytes.
    try:
        dtype = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype]
    except KeyError:
        raise RuntimeError("Numpy data type not understood yet: {}".format(
            str(arr.dtype)))
    tensor.data_type = dtype
    tensor.raw_data = arr.tobytes()  # note: tobytes() is only after 1.9.

    return tensor
Esempio n. 13
0
def make_tensor(name: str,
                data_type: int,
                dims: Sequence[int],
                vals: Any,
                raw: bool = False) -> TensorProto:
    '''
    Make a TensorProto with specified arguments.  If raw is False, this
    function will choose the corresponding proto field to store the
    values based on data_type. If raw is True, use "raw_data" proto
    field to store the values, and values should be of type bytes in
    this case.

    Arguments:
        name (string): tensor name
        data_type (int): a value such as onnx.TensorProto.FLOAT
        dims (List[int]): shape
        vals: values
        raw (bool): if True, vals contains the seralized content of the tensor,
            otherwise, vals should be a list of values of the type defined by *data_type*

    Returns:
        TensorProto
    '''
    tensor = TensorProto()
    tensor.data_type = data_type
    tensor.name = name

    if data_type == TensorProto.STRING:
        assert not raw, "Can not use raw_data to store string type"

    np_dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[data_type]

    # Check number of vals specified equals tensor size
    expected_size = 1
    if raw:
        # NumPy doesn't have BFLOAT16. TENSOR_TYPE_TO_NP_TYPE maps it to float32,
        # which has the wrong itemsize.
        if data_type == TensorProto.BFLOAT16:
            expected_size = 2
        else:
            expected_size = np_dtype.itemsize

    if type(vals) is np.ndarray and len(vals.shape) > 1:
        vals = vals.flatten()
    for d in dims:
        expected_size *= d

    if len(vals) != expected_size:
        raise ValueError(
            "Number of values does not match tensor's size. Expected {}, but it is {}. "
            .format(expected_size, len(vals)))

    if raw:
        tensor.raw_data = vals
    else:
        if (data_type == TensorProto.COMPLEX64
                or data_type == TensorProto.COMPLEX128):
            vals = split_complex_to_pairs(vals)
        elif data_type == TensorProto.FLOAT16:
            vals = np.array(vals).astype(np_dtype).view(
                dtype=np.uint16).flatten().tolist()
        elif data_type == TensorProto.BFLOAT16:
            vals = list(
                map(float32_to_bfloat16,
                    np.array(vals).astype(np_dtype).flatten().tolist()))
        field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[
            mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]]
        getattr(tensor, field).extend(vals)
    tensor.dims.extend(dims)
    return tensor