def copy_array(dst, src): dst_bf = asarray(dst) src_bf = asarray(src) if (space_accessible(dst_bf.bf.space, ['system']) and space_accessible(src_bf.bf.space, ['system'])): np.copyto(dst_bf, src_bf) else: _check(_bf.bfArrayCopy(dst_bf.as_BFarray(), src_bf.as_BFarray())) if dst_bf.bf.space != src_bf.bf.space: # TODO: Decide where/when these need to be called device.stream_synchronize() return dst
def __init__(self, irings, name=None, type_=None, **kwargs): self.type = type_ or self.__class__.__name__ self.name = name or '%s_%i' % (self.type, Block.instance_counts[self.type]) Block.instance_counts[self.type] += 1 super(Block, self).__init__(**kwargs) self.pipeline = get_default_pipeline() self.pipeline.blocks.append(self) # Allow Block instances to be passed in place of rings irings = [get_ring(iring) for iring in irings] self.irings = irings valid_inp_spaces = self._define_valid_input_spaces() for i, (iring, valid_spaces) in enumerate(zip(irings, valid_inp_spaces)): if not memory.space_accessible(iring.space, valid_spaces): raise ValueError( "Block %s input %i's space must be accessible from one of: %s" % (self.name, i, str(valid_spaces))) self.orings = [] # Update this in subclass constructors self.shutdown_event = threading.Event() self.bind_proclog = ProcLog(self.name + "/bind") self.in_proclog = ProcLog(self.name + "/in") rnames = {'nring': len(self.irings)} for i, r in enumerate(self.irings): rnames['ring%i' % i] = r.name self.in_proclog.update(rnames) self.init_trace = ''.join(traceback.format_stack()[:-1])
def as_cupy(self, *args, **kwargs): import cupy as cp if space_accessible(self.bf.space, ['cuda']): umem = cp.cuda.UnownedMemory(self.ctypes.data, self.data.nbytes, self) mptr = cp.cuda.MemoryPointer(umem, 0) ca = cp.ndarray(self.shape, dtype=self.dtype, memptr=mptr, strides=self.strides) else: ca = cp.asarray(np.array(self)) return ca
def copy(self, space=None, order='C'): if order != 'C': raise NotImplementedError('Only order="C" is supported') if space is None: space = self.bf.space if not self.flags['C_CONTIGUOUS']: # Deal with arrays that need to have their layouts changed # TODO: Is there a better way to handle this? if space_accessible(self.bf.space, ['system']): ## For arrays that can be accessed from the system space, use ## numpy.ndarray.copy() to do the heavy lifting if space == 'cuda_managed': ## TODO: Decide where/when these need to be called device.stream_synchronize() ## This actually makes two copies and throws one away temp = ndarray(shape=self.shape, dtype=self.dtype, space=self.bf.space) temp[...] = np.array(self).copy() if self.bf.space != space: return ndarray(temp, space=space) return temp else: ## For arrays that can be access from CUDA, use bifrost.transpose ## to do the heavy lifting ### Figure out the correct axis order for C permute = np.argsort(self.strides)[::-1] c_shape = [self.shape[p] for p in permute] ### Make a BFarray wrapper for self so we can reset shape/strides ### to what they should be for a C ordered array self_corder = self.as_BFarray() shape_type = ctypes.c_long*_bf.BF_MAX_DIMS self_corder.shape = shape_type(*c_shape) self_corder.strides = shape_type(*[self.strides[p] for p in permute]) ### Make a temporary array with the right shape that will be C ordered temp = ndarray(shape=self.shape, dtype=self.dtype, space=self.bf.space) ### Run the transpose using the BFarray wrapper and the temporary array array_type = ctypes.c_int * self.ndim axes_array = array_type(*permute) _check(_bf.bfTranspose(self_corder, temp.as_BFarray(), axes_array)) if self.bf.space != space: return ndarray(temp, space=space) return temp # Note: This makes an actual copy as long as space is not None return ndarray(self, space=space)
def on_data(self, ispan, ospan): # TODO: bf.memory.transpose should support system space too if space_accessible(self.space, ['cuda']): bf_transpose(ospan.data, ispan.data, self.axes) else: ospan.data[...] = np.transpose(ispan.data, self.axes)
def _system_accessible_copy(self): if space_accessible(self.bf.space, ['system']): return self else: return self.copy(space='system')