def FeedBlob(ofblob): assert ofblob.is_tensor_list ndarray_list = feed_ctx.GetMirroredTensorList(ofblob.static_shape) assert isinstance(ndarray_list, (list, tuple)) assert all( isinstance(ndarray, numpy.ndarray) for ndarray in ndarray_list) dtype = dtype_util.convert_oneflow_dtype_to_numpy_dtype( ofblob.dtype) assert all(ndarray.dtype == dtype for ndarray in ndarray_list) if ofblob.CopyFromNdarrayList(ndarray_list) is False: raise ValueError
def FeedBlob(ofblob): ndarray = feed_ctx.GetFixedTensor(blob_def.shape) dtype = dtype_util.convert_oneflow_dtype_to_numpy_dtype( ofblob.dtype) assert ndarray.dtype == dtype, "%s v.s. %s" % (ndarray.dtype, dtype) assert ndarray.shape == ofblob.static_shape, "%s v.s. %s" % ( ndarray.shape, ofblob.static_shape, ) if ofblob.CopyFromNdarray(ndarray) is False: raise ValueError
def _ReadSlice( container: ValueContainer, ) -> Iterable[Tuple[Sequence[int], Sequence[int], np.ndarray]]: """ Return a generator which iterates over the input blob or array and yields (start_nd_idx, stop_nd_idx, slice_np_array) """ if isinstance(container, oneflow.Tensor): def ReadFromTensor(tensor, start_nd_idx, stop_nd_idx): start_nd_idx = list(map(int, start_nd_idx)) stop_nd_idx = list(map(int, stop_nd_idx)) return tensor[tuple([ slice(start_nd_idx[i], stop_nd_idx[i]) for i in range(len(start_nd_idx)) ])].numpy() yield from _ForEachSlice(container, ReadFromTensor) elif isinstance(container, EagerBlobTrait): def ReadFromEagerBlob(eager_blob, start_nd_idx, stop_nd_idx): scope_symbol_id = _GetScopeSymbolIdFromEagerBlob(eager_blob) return _LogicalSlice(eager_blob.blob_object, start_nd_idx, stop_nd_idx, scope_symbol_id) yield from _ForEachSlice(container, ReadFromEagerBlob) elif isinstance(container, FileBackendVariableBlob): np_dtype = np.dtype( dtype_util.convert_oneflow_dtype_to_numpy_dtype(container.dtype)) with open(container.file_path, "rb") as f: def ReadFromFile(_, start_nd_idx, stop_nd_idx): length = _ElemCnt( np.array(stop_nd_idx) - np.array(start_nd_idx)) slice = f.read(length * np_dtype.itemsize) return np.frombuffer( slice, dtype=np_dtype, ).reshape(np.array(stop_nd_idx) - np.array(start_nd_idx)) yield from _ForEachSlice(container, ReadFromFile) elif isinstance(container, np.ndarray): def ReadFromNpArray(array, start_nd_idx, stop_nd_idx): slice_objs = [] for start, stop in zip(start_nd_idx, stop_nd_idx): slice_objs.append(slice(start, stop)) return array[tuple(slice_objs)] yield from _ForEachSlice(container, ReadFromNpArray) else: raise RuntimeError("Unknown type: {}".format(type(container).__name__))
def GenerateValueAndAssign(var_blob, start_nd_idx, stop_nd_idx): np_dtype = np.dtype( dtype_util.convert_oneflow_dtype_to_numpy_dtype(var_blob.dtype)) length = _ElemCnt(np.array(stop_nd_idx) - np.array(start_nd_idx)) vals = (np.array(initializer(length)).astype(np_dtype).reshape( np.array(stop_nd_idx) - np.array(start_nd_idx))) slice_value_blob = _GetCpu0VariableBlobFromNumpy(vals, var_blob.dtype) _LogicalSliceAssign( var_blob, slice_value_blob, start_nd_idx, stop_nd_idx, )
def _ForEachSlice( container: ValueContainer, f: Union[Callable[[EagerBlobTrait, Sequence[int], Sequence[int]], Any], Callable[[FileBackendVariableBlob, Sequence[int], Sequence[int]], Any], Callable[[np.ndarray, Sequence[int], Sequence[int]], Any], ], ): """ Slice container into slices whose size < SLICE_BYTES. For every slice, yield start_nd_idx, stop_nd_idx and f(slice) """ assert isinstance(container, (EagerBlobTrait, FileBackendVariableBlob, np.ndarray, oneflow.Tensor)), "Unknown type: {}".format( type(container).__name__) assert container.shape is not None # For current implementation (transport data by grpc), SLICE_BYTES must be lower than 64M SLICE_BYTES = 32 * 1024 * 1024 if isinstance(container, np.ndarray): np_dtype = container.dtype else: np_dtype = np.dtype( dtype_util.convert_oneflow_dtype_to_numpy_dtype(container.dtype)) SLICE_LEN = SLICE_BYTES // np_dtype.itemsize start_idx = 0 size = _ElemCnt(container.shape) cnt = 1 for axis in reversed(range(len(container.shape))): cnt *= container.shape[axis] if cnt > SLICE_LEN: break unit_size = _ElemCnt(tuple(container.shape)[axis + 1:]) max_unit_num = SLICE_LEN // unit_size while start_idx < size: remainder = container.shape[axis] while remainder > 0: unit_num = max_unit_num if remainder >= max_unit_num else remainder length = unit_num * unit_size remainder -= unit_num stop_idx = start_idx + length start_nd_idx = np.unravel_index(start_idx, container.shape) stop_nd_idx = np.unravel_index(stop_idx - 1, container.shape) stop_nd_idx = tuple([x + 1 for x in stop_nd_idx]) yield start_nd_idx, stop_nd_idx, f(container, start_nd_idx, stop_nd_idx) start_idx = stop_idx
def GenerateValueAndAssign(var_blob, start_nd_idx, stop_nd_idx): np_dtype = np.dtype( dtype_util.convert_oneflow_dtype_to_numpy_dtype(var_blob.dtype)) length = _ElemCnt(np.array(stop_nd_idx) - np.array(start_nd_idx)) vals = (np.array(initializer(length)).astype(np_dtype).reshape( np.array(stop_nd_idx) - np.array(start_nd_idx))) if isinstance(var_blob, oneflow.Tensor): var_blob_object = var_blob._blob_object else: assert isinstance(var_blob, EagerBlobTrait) var_blob_object = var_blob.blob_object slice_value_blob = _GetCpu0VariableBlobFromNumpy(vals, var_blob.dtype) _LogicalSliceAssign( var_blob_object, slice_value_blob.blob_object, start_nd_idx, stop_nd_idx, scope_symbol_id, )
def __init__( self, var_dir: str, dtype: Optional[dtype_util.dtype] = None, shape: Optional[Sequence[int]] = None, ): data_path = os.path.join(var_dir, DATA_FILENAME) assert os.path.isfile(data_path) self.var_dir_ = var_dir meta_info_path = os.path.join(self.var_dir_, META_INFO_FILENAME) if os.path.exists(meta_info_path): meta_info = variable_meta_info_pb.VariableMetaInfo() with open(meta_info_path) as f: text_format.Parse(f.read(), meta_info) self.has_meta_info_ = True else: self.has_meta_info_ = False if self.has_meta_info_: assert dtype is None and shape is None self.shape_ = tuple(meta_info.shape.dim) self.dtype_ = dtype_util.convert_proto_dtype_to_oneflow_dtype( meta_info.data_type) else: if shape is not None and dtype is not None: self.shape_ = shape self.dtype_ = dtype self.has_meta_info_ = True elif shape is not None or dtype is not None: raise RuntimeError( "both or neither of shape and dtype should be None") else: pass if self.has_meta_info_: itemsize = np.dtype( dtype_util.convert_oneflow_dtype_to_numpy_dtype( self.dtype_)).itemsize assert os.path.getsize(data_path) == np.prod( self.shape).item() * itemsize
def generate_values_by_initializer(initializer, shape, dtype): np_dtype = np.dtype(dtype_util.convert_oneflow_dtype_to_numpy_dtype(dtype)) length = _ElemCnt(shape) return np.array(initializer(length)).astype(np_dtype).reshape(shape)