def init_reductions(): ForkingPickler.register(torch.cuda.Event, reduce_event) for t in torch._storage_classes: ForkingPickler.register(t, reduce_storage) for t in torch._tensor_classes: ForkingPickler.register(t, reduce_tensor) ForkingPickler.register(Variable, reduce_variable) ForkingPickler.register(Parameter, reduce_variable)
def init_reductions(): ForkingPickler.register(torch.cuda.Event, reduce_event) for t in torch._storage_classes: ForkingPickler.register(t, reduce_storage) for t in torch._tensor_classes: ForkingPickler.register(t, reduce_tensor) # TODO: Maybe this should be in tensor_classes? :) ForkingPickler.register(torch.Tensor, reduce_tensor) ForkingPickler.register(torch.nn.parameter.Parameter, reduce_tensor)
def init_reductions(): if not _supported_check(): return ForkingPickler.register(paddle.Tensor, reduce_tensor) ForkingPickler.register(paddle.fluid.core.VarBase, reduce_tensor) ForkingPickler.register(paddle.fluid.framework.ParamBase, reduce_tensor) ForkingPickler.register(paddle.fluid.core.LoDTensor, reduce_lodtensor)
def init_reductions(): ForkingPickler.register(torch.cuda.Event, reduce_event) for t in torch._storage_classes: ForkingPickler.register(t, reduce_storage) for t in torch._tensor_classes: ForkingPickler.register(t, reduce_tensor)
return nd.NDArray( nd.ndarray._new_from_shared_mem(pid, fd, shape, dtype)) def reduce_ndarray(data): """Reduce ndarray to shared memory handle""" # keep a local ref before duplicating fd data = data.as_in_context(context.Context('cpu_shared', 0)) pid, fd, shape, dtype = data._to_shared_mem() if sys.version_info[0] == 2: fd = multiprocessing.reduction.reduce_handle(fd) else: fd = multiprocessing.reduction.DupFd(fd) return rebuild_ndarray, (pid, fd, shape, dtype) ForkingPickler.register(nd.NDArray, reduce_ndarray) class ConnectionWrapper(object): """Connection wrapper for multiprocessing that supports sending NDArray via shared memory.""" def __init__(self, conn): self._conn = conn def send(self, obj): """Send object""" buf = io.BytesIO() ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(obj) self.send_bytes(buf.getvalue()) def recv(self):
else: fd = fd.detach() return nd.NDArray(nd.ndarray._new_from_shared_mem(pid, fd, shape, dtype)) def reduce_ndarray(data): """Reduce ndarray to shared memory handle""" # keep a local ref before duplicating fd data = data.as_in_context(context.Context('cpu_shared', 0)) pid, fd, shape, dtype = data._to_shared_mem() if sys.version_info[0] == 2: fd = multiprocessing.reduction.reduce_handle(fd) else: fd = multiprocessing.reduction.DupFd(fd) return rebuild_ndarray, (pid, fd, shape, dtype) ForkingPickler.register(nd.NDArray, reduce_ndarray) class ConnectionWrapper(object): """Connection wrapper for multiprocessing that supports sending NDArray via shared memory.""" def __init__(self, conn): self._conn = conn def send(self, obj): """Send object""" buf = io.BytesIO() ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(obj) self.send_bytes(buf.getvalue())
with contextlib.suppress(BaseException): from multiprocessing.reduction import ForkingPickler def _rebuild_meta(cls, storage, metadata): storage_offset, size, stride, meta_obj, applied_operations = metadata t = cls([], meta=meta_obj, applied_operations=applied_operations, dtype=storage.dtype, device=storage.device) t.set_(storage._untyped() if hasattr(storage, "_untyped") else storage, storage_offset, size, stride) return t def reduce_meta_tensor(meta_tensor): storage = meta_tensor.storage() if storage.is_cuda: raise NotImplementedError( "sharing CUDA metatensor across processes not implemented") metadata = ( meta_tensor.storage_offset(), meta_tensor.size(), meta_tensor.stride(), meta_tensor.meta, meta_tensor.applied_operations, ) return _rebuild_meta, (type(meta_tensor), storage, metadata) ForkingPickler.register(MetaTensor, reduce_meta_tensor)
def init_reductions(): ForkingPickler.register(torch.cuda.Event, reduce_event) for t in torch._storage_classes: if t.__name__ == '_UntypedStorage': ForkingPickler.register(t, reduce_storage) else: ForkingPickler.register(t, reduce_typed_storage_child) ForkingPickler.register(torch.storage._TypedStorage, reduce_typed_storage) for t in torch._tensor_classes: ForkingPickler.register(t, reduce_tensor) # TODO: Maybe this should be in tensor_classes? :) ForkingPickler.register(torch.Tensor, reduce_tensor) ForkingPickler.register(torch.nn.parameter.Parameter, reduce_tensor)
def init_reductions(): ForkingPickler.register(Tensor, reduce_tensor) ForkingPickler.register(flow._oneflow_internal.Tensor, reduce_tensor) ForkingPickler.register(Parameter, reduce_parameter) ForkingPickler.register(flow._oneflow_internal.nn.Parameter, reduce_parameter)
def _forking_reduce(graph_index): # Because F.from_dgl_nd(F.to_dgl_nd(x)) loses the information of shared memory # file descriptor (because DLPack does not keep it), without caching the tensors # PyTorch will allocate one shared memory region for every single worker. # The downside is that if a graph_index is shared by forking and new formats are created # afterwards, then sharing it again will not bring together the new formats. This case # should be rare though because (1) DataLoader will create all the formats if num_workers > 0 # anyway, and (2) we require the users to explicitly create all formats before calling # mp.spawn(). if hasattr(graph_index, '_forking_pk_state'): return _forking_rebuild, (graph_index._forking_pk_state, ) states = _CAPI_DGLHeteroForkingPickle(graph_index) arrays = [F.from_dgl_nd(arr) for arr in states.arrays] # Similar to what being mentioned in HeteroGraphIndex.__getstate__, we need to save # the tensors as an attribute of the original graph index object. Otherwise # PyTorch will throw weird errors like bad value(s) in fds_to_keep or unable to # resize file. graph_index._forking_pk_state = (states.version, states.meta, arrays) return _forking_rebuild, (graph_index._forking_pk_state, ) if not (F.get_preferred_backend() == 'mxnet' and sys.version_info.minor <= 6): # Python 3.6 MXNet crashes with the following statement; remove until we no longer support # 3.6 (which is EOL anyway). from multiprocessing.reduction import ForkingPickler ForkingPickler.register(HeteroGraphIndex, _forking_reduce) _init_api("dgl.heterograph_index")
def init_reduction(): ForkingPickler.register(SharedNDArray, reduce_shared_ndarray)