def from_array(arr, name=None): # type: (np.ndarray[Any], Optional[Text]) -> TensorProto """Converts a numpy array to a tensor def. Inputs: arr: a numpy array. name: (optional) the name of the tensor. Returns: tensor_def: the converted tensor def. """ tensor = TensorProto() tensor.dims.extend(arr.shape) if name: tensor.name = name if arr.dtype == np.object: # Special care for strings. raise NotImplementedError("Need to properly implement string.") # For numerical types, directly use numpy raw bytes. try: dtype = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype] except KeyError: raise RuntimeError( "Numpy data type not understood yet: {}".format(str(arr.dtype))) tensor.data_type = dtype tensor.raw_data = arr.tobytes() # note: tobytes() is only after 1.9. return tensor
def make_tensor( name, # type: Text data_type, # type: TensorProto.DataType dims, # type: Sequence[int] vals, # type: Any raw=False # type: bool ): # type: (...) -> TensorProto ''' Make a TensorProto with specified arguments. If raw is False, this function will choose the corresponding proto field to store the values based on data_type. If raw is True, use "raw_data" proto field to store the values, and values should be of type bytes in this case. ''' tensor = TensorProto() tensor.data_type = data_type tensor.name = name if data_type == TensorProto.STRING: assert not raw, "Can not use raw_data to store string type" if (data_type == TensorProto.COMPLEX64 or data_type == TensorProto.COMPLEX128): vals = split_complex_to_pairs(vals) if raw: tensor.raw_data = vals else: field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[ mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]] getattr(tensor, field).extend(vals) tensor.dims.extend(dims) return tensor
def from_array( arr, name=None): # type: (np.ndarray[Any], Optional[Text]) -> TensorProto """Converts a numpy array to a tensor def. Inputs: arr: a numpy array. name: (optional) the name of the tensor. Returns: tensor_def: the converted tensor def. """ tensor = TensorProto() tensor.dims.extend(arr.shape) if name: tensor.name = name if arr.dtype == np.object: # Special care for strings. tensor.data_type = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype] # TODO: Introduce full string support. # We flatten the array in case there are 2-D arrays are specified # We throw the error below if we have a 3-D array or some kind of other # object. If you want more complex shapes then follow the below instructions. # Unlike other types where the shape is automatically inferred from # nested arrays of values, the only reliable way now to feed strings # is to put them into a flat array then specify type astype(np.object) # (otherwise all strings may have different types depending on their length) # and then specify shape .reshape([x, y, z]) flat_array = arr.flatten() for e in flat_array: if isinstance(e, text_type): tensor.string_data.append(e.encode('utf-8')) elif isinstance(e, np.ndarray): for s in e: if isinstance(s, text_type): tensor.string_data.append(s.encode('utf-8')) elif isinstance(s, bytes): tensor.string_data.append(s) elif isinstance(e, bytes): tensor.string_data.append(e) else: raise NotImplementedError( "Unrecognized object in the object array, expect a string, or array of bytes: ", str(type(e))) return tensor # For numerical types, directly use numpy raw bytes. try: dtype = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype] except KeyError: raise RuntimeError("Numpy data type not understood yet: {}".format( str(arr.dtype))) tensor.data_type = dtype tensor.raw_data = arr.tobytes() # note: tobytes() is only after 1.9. if sys.byteorder == 'big': # Convert endian from big to little convert_endian(tensor) return tensor
def make_external_tensor(name, data_type, dims, raw_data=None, **kwargs): tensor = TensorProto() tensor.data_type = data_type tensor.name = name tensor.dims.extend(dims) if raw_data is not None: tensor.raw_data = raw_data external_data_helper.set_external_data(tensor, **kwargs) order_repeated_field(tensor.external_data, 'key', kwargs.keys()) return tensor
def convert_endian(tensor: TensorProto) -> None: """ call to convert endianess of raw data in tensor. @params TensorProto: TensorProto to be converted. """ tensor_dtype = tensor.data_type np_dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[tensor_dtype] tensor.raw_data = np.frombuffer(tensor.raw_data, dtype=np_dtype).byteswap().tobytes()
def test_check_string_tensor(self): tensor = TensorProto() tensor.data_type = TensorProto.STRING tensor.dims.append(1) tensor.string_data.append('Test'.encode('utf-8')) checker.check_tensor(tensor) del tensor.string_data[:] tensor.raw_data = 'Test'.encode('utf-8') # string data should not be stored in raw_data field self.assertRaises(checker.ValidationError, checker.check_tensor, tensor)
def test_check_string_tensor(self): # type: () -> None tensor = TensorProto() tensor.data_type = TensorProto.STRING tensor.dims.append(1) tensor.string_data.append('Test'.encode('utf-8')) checker.check_tensor(tensor) del tensor.string_data[:] tensor.raw_data = 'Test'.encode('utf-8') # string data should not be stored in raw_data field self.assertRaises(checker.ValidationError, checker.check_tensor, tensor)
def make_tensor( name, # type: Text data_type, # type: int dims, # type: Sequence[int] vals, # type: Any raw=False # type: bool ): # type: (...) -> TensorProto ''' Make a TensorProto with specified arguments. If raw is False, this function will choose the corresponding proto field to store the values based on data_type. If raw is True, use "raw_data" proto field to store the values, and values should be of type bytes in this case. ''' tensor = TensorProto() tensor.data_type = data_type tensor.name = name if data_type == TensorProto.STRING: assert not raw, "Can not use raw_data to store string type" # Check number of vals specified equals tensor size expected_size = 1 if (not raw) else ( mapping.TENSOR_TYPE_TO_NP_TYPE[data_type].itemsize) # Flatten a numpy array if its rank > 1 if type(vals) is np.ndarray and len(vals.shape) > 1: vals = vals.flatten() for d in dims: expected_size = expected_size * d if len(vals) != expected_size: raise ValueError( "Number of values does not match tensor's size. Expected {}, but it is {}. " .format(expected_size, len(vals))) if raw: tensor.raw_data = vals else: if (data_type == TensorProto.COMPLEX64 or data_type == TensorProto.COMPLEX128): vals = split_complex_to_pairs(vals) # floa16/bfloat16 are stored as uint16 elif (data_type == TensorProto.FLOAT16 or data_type == TensorProto.BFLOAT16): vals = np.array(vals).astype( np.float16).view(dtype=np.uint16).flatten().tolist() field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[ mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]] getattr(tensor, field).extend(vals) tensor.dims.extend(dims) return tensor
def make_tensor(name: str, vals: np.ndarray) -> ITensorProto: """ Make a TensorProto with specified arguments. If raw is False, this function will choose the corresponding proto field to store the values based on data_type. If raw is True, use "raw_data" proto field to store the values, and values should be of type bytes in this case. """ vals = vals.astype(np.float32) tensor = TensorProto() tensor.data_type = DataType.FLOAT tensor.name = name tensor.raw_data = vals.tobytes() tensor.dims.extend(vals.shape) return tensor
def _strip_raw_data(tensor: onnx.TensorProto) -> onnx.TensorProto: arr = onnx.numpy_helper.to_array(tensor) meta_dict = {} meta_dict['type'] = "stripped" meta_dict['average'] = float(arr.mean()) # type: ignore[assignment] meta_dict['variance'] = float(arr.var()) # type: ignore[assignment] if not tensor.HasField("raw_data"): tensor.raw_data = onnx.numpy_helper.from_array(arr, tensor.name).raw_data onnx.external_data_helper.set_external_data(tensor, location=json.dumps(meta_dict), length=arr.nbytes) tensor.data_location = onnx.TensorProto.EXTERNAL tensor.ClearField('raw_data') tensor.ClearField('float_data') return tensor
def make_tensor( name, # type: Text data_type, # type: int dims, # type: Sequence[int] vals, # type: Any raw=False # type: bool ): # type: (...) -> TensorProto ''' Make a TensorProto with specified arguments. If raw is False, this function will choose the corresponding proto field to store the values based on data_type. If raw is True, use "raw_data" proto field to store the values, and values should be of type bytes in this case. ''' tensor = TensorProto() tensor.data_type = data_type tensor.name = name if data_type == TensorProto.STRING: assert not raw, "Can not use raw_data to store string type" # Check number of vals specified equals tensor size size = 1 if (not raw) else ( mapping.TENSOR_TYPE_TO_NP_TYPE[data_type].itemsize) for d in dims: size = size * d if (len(vals) != size): raise ValueError("Number of values does not match tensor's size.") if (data_type == TensorProto.COMPLEX64 or data_type == TensorProto.COMPLEX128): vals = split_complex_to_pairs(vals) if raw: tensor.raw_data = vals else: field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[ mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]] getattr(tensor, field).extend(vals) tensor.dims.extend(dims) return tensor
def from_array( arr, name=None): # type: (np.ndarray[Any], Optional[Text]) -> TensorProto """Converts a numpy array to a tensor def. Inputs: arr: a numpy array. name: (optional) the name of the tensor. Returns: tensor_def: the converted tensor def. """ tensor = TensorProto() tensor.dims.extend(arr.shape) if name: tensor.name = name if arr.dtype == np.object: # Special care for strings. tensor.data_type = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype] for e in arr: if isinstance(e, text_type): tensor.string_data.append(e.encode('utf-8')) elif isinstance(e, np.ndarray): tensor.string_data.append(e.tobytes()) else: raise NotImplementedError( "Unrecognized object in the object array, expect a string, or array of bytes" ) return tensor # For numerical types, directly use numpy raw bytes. try: dtype = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype] except KeyError: raise RuntimeError("Numpy data type not understood yet: {}".format( str(arr.dtype))) tensor.data_type = dtype tensor.raw_data = arr.tobytes() # note: tobytes() is only after 1.9. return tensor
def make_tensor(name: str, data_type: int, dims: Sequence[int], vals: Any, raw: bool = False) -> TensorProto: ''' Make a TensorProto with specified arguments. If raw is False, this function will choose the corresponding proto field to store the values based on data_type. If raw is True, use "raw_data" proto field to store the values, and values should be of type bytes in this case. Arguments: name (string): tensor name data_type (int): a value such as onnx.TensorProto.FLOAT dims (List[int]): shape vals: values raw (bool): if True, vals contains the seralized content of the tensor, otherwise, vals should be a list of values of the type defined by *data_type* Returns: TensorProto ''' tensor = TensorProto() tensor.data_type = data_type tensor.name = name if data_type == TensorProto.STRING: assert not raw, "Can not use raw_data to store string type" np_dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[data_type] # Check number of vals specified equals tensor size expected_size = 1 if raw: # NumPy doesn't have BFLOAT16. TENSOR_TYPE_TO_NP_TYPE maps it to float32, # which has the wrong itemsize. if data_type == TensorProto.BFLOAT16: expected_size = 2 else: expected_size = np_dtype.itemsize if type(vals) is np.ndarray and len(vals.shape) > 1: vals = vals.flatten() for d in dims: expected_size *= d if len(vals) != expected_size: raise ValueError( "Number of values does not match tensor's size. Expected {}, but it is {}. " .format(expected_size, len(vals))) if raw: tensor.raw_data = vals else: if (data_type == TensorProto.COMPLEX64 or data_type == TensorProto.COMPLEX128): vals = split_complex_to_pairs(vals) elif data_type == TensorProto.FLOAT16: vals = np.array(vals).astype(np_dtype).view( dtype=np.uint16).flatten().tolist() elif data_type == TensorProto.BFLOAT16: vals = list( map(float32_to_bfloat16, np.array(vals).astype(np_dtype).flatten().tolist())) field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[ mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]] getattr(tensor, field).extend(vals) tensor.dims.extend(dims) return tensor