def __init__(self, ring, header, gulp_nframe, buf_nframe): SequenceBase.__init__(self, ring) self._header = header # This allows passing DataType instances instead of string types header['_tensor']['dtype'] = str(header['_tensor']['dtype']) header_str = json.dumps(header) header_size = len(header_str) tensor = self.tensor # **TODO: Consider moving this into bfRingSequenceBegin self.ring.resize(gulp_nframe * tensor['frame_nbyte'], buf_nframe * tensor['frame_nbyte'], tensor['nringlet']) offset_from_head = 0 # TODO: How to allow time_tag to be optional? Probably need to plumb support through to backend. self.obj = _bf.BFwsequence() try: hname = header['name'].encode() hstr = header_str.encode() except AttributeError: # Python2 catch hname = header['name'] hstr = header_str _check(_bf.bfRingSequenceBegin( self.obj, ring.obj, hname, header['time_tag'], header_size, hstr, tensor['nringlet'], offset_from_head))
def __init__(self, ring, name="", time_tag=-1, header="", nringlet=1): SequenceBase.__init__(self, ring) # TODO: Allow header to be a string, buffer, or numpy array header_size = len(header) if isinstance(header, np.ndarray): header = header.ctypes.data elif isinstance(header, str): try: header = header.encode() except AttributeError: # Python2 catch pass #print("hdr:", header_size, type(header)) name = str(name) offset_from_head = 0 self.obj = _bf.BFwsequence() try: name = name.encode() except AttributeError: # Python2 catch pass _check( _bf.bfRingSequenceBegin(self.obj, ring.obj, name, time_tag, header_size, header, nringlet, offset_from_head))
def init(self, coeffs, decim=1, space='cuda'): space = _string2space(space) psize = None _check( _bf.bfFirInit(self.obj, asarray(coeffs).as_BFarray(), decim, space, 0, psize))
def set_devices_no_spin_cpu(): """Sets a flag on all GPU devices that tells them not to spin the CPU when synchronizing. This is useful for reducing CPU load in GPU pipelines. This function must be called _before_ any GPU devices are initialized (i.e., at the start of the process).""" _check(_bf.bfDevicesSetNoSpinCPU())
def execute(self, idata, odata): # TODO: Work out how to integrate CUDA stream _check( _bf.bfFirExecute(self.obj, asarray(idata).as_BFarray(), asarray(odata).as_BFarray())) return odata
def matmul(self, alpha, a, b, beta, c): """Computes: c = alpha*a.b + beta*c or if b is None: c = alpha*a.a^H + beta*c or if a is None: c = alpha*b^H.b + beta*c where '.' is matrix product and '^H' is Hermitian transpose. Multi-dimensional semantics are the same as numpy.matmul: The last two dims represent the matrix, and all other dims are used as batch dims to be matched or broadcast between a and b. """ if alpha is None: alpha = 1. if beta is None: beta = 0. beta = float(beta) alpha = float(alpha) a_array = asarray(a).as_BFarray() if a is not None else None b_array = asarray(b).as_BFarray() if b is not None else None c_array = asarray(c).as_BFarray() _check(_bf.bfLinAlgMatMul(self.obj, alpha, a_array, b_array, beta, c_array)) return c
def reduce(idata, odata, op='sum'): if op not in REDUCE_MAP: raise ValueError("Invalid reduce op: " + str(op)) op = REDUCE_MAP[op] _check(_bf.bfReduce(asarray(idata).as_BFarray(), asarray(odata).as_BFarray(), op))
def transpose(dst, src, axes=None): if axes is None: axes = reversed(range(len(dst.shape))) dst_bf = asarray(dst).as_BFarray() src_bf = asarray(src).as_BFarray() array_type = ctypes.c_int * src.ndim axes_array = array_type(*axes) _check(_bf.bfTranspose(src_bf, dst_bf, axes_array))
def memset2D(dst, val=0): assert (len(dst.shape) == 2) space = _string2space(_get_space(dst)) height, width = dst.shape width_bytes = width * dst.dtype.itemsize _check( _bf.bfMemset2D(dst.ctypes.data, dst.strides[0], space, val, width_bytes, height))
def resize(self, contiguous_span, total_span=None, nringlet=1, buffer_factor=4): if total_span is None: total_span = contiguous_span * buffer_factor _check( _bf.bfRingResize(self.obj, contiguous_span, total_span, nringlet))
def execute_workspace(self, iarray, oarray, workspace_ptr, workspace_size, inverse=False): _check(_bf.bfFftExecute( self.obj, asarray(iarray).as_BFarray(), asarray(oarray).as_BFarray(), inverse, workspace_ptr, workspace_size)) return oarray
def set_openmp_cores(cores): # PYCLIBRARY ISSUE # TODO: Would be really nice to be able to directly pass # a list here instead of needing to specify _array+type. # Should be able to do it safely for any const* argument # Note that the base type of the pointer type could be # derived via a reverse lookup table. # E.g., Inverse of POINTER(c_int)-->LP_c_int _check(_bf.bfAffinitySetOpenMPCores(len(cores), _array(cores, 'int')))
def __init__(self, ring, sequence, nframe, nonblocking=False): SpanBase.__init__(self, ring, sequence, writeable=True) nbyte = nframe * self._sequence.tensor['frame_nbyte'] self.obj = _bf.BFwspan() _check(_bf.bfRingSpanReserve(self.obj, ring.obj, nbyte, nonblocking)) self._set_base_obj(self.obj) # Note: We default to 0 instead of nframe so that we don't accidentally # commit bogus data if a block throws an exception. self.commit_nframe = 0
def execute(self, idata, odata, negative_delays=False): # TODO: Work out how to integrate CUDA stream psize = None _check( _bf.bfFdmtExecute(self.obj, asarray(idata).as_BFarray(), asarray(odata).as_BFarray(), negative_delays, None, psize)) return odata
def address(self): buflen = 128 buf = ctypes.create_string_buffer(buflen) _check(_bf.bfAddressGetString(self.obj, buflen, buf)) try: value = buf.value.decode() except AttributeError: # Python2 catch value = buf.value return value
def memcpy(dst, src): assert (dst.flags['C_CONTIGUOUS']) assert (src.shape == dst.shape) dst_space = _string2space(_get_space(dst)) src_space = _string2space(_get_space(src)) count = dst.nbytes _check( _bf.bfMemcpy(dst.ctypes.data, dst_space, src.ctypes.data, src_space, count)) return dst
def memcpy2D(dst, src): assert (len(dst.shape) == 2) assert (src.shape == dst.shape) dst_space = _string2space(_get_space(dst)) src_space = _string2space(_get_space(src)) height, width = dst.shape width_bytes = width * dst.dtype.itemsize _check( _bf.bfMemcpy2D(dst.ctypes.data, dst.strides[0], dst_space, src.ctypes.data, src.strides[0], src_space, width_bytes, height))
def __init__(self, sequence, frame_offset, nframe): SpanBase.__init__(self, sequence.ring, sequence, writeable=False) tensor = sequence.tensor self.obj = _bf.BFrspan() _check( _bf.bfRingSpanAcquire(self.obj, sequence.obj, frame_offset * tensor['frame_nbyte'], nframe * tensor['frame_nbyte'])) self._set_base_obj(self.obj) self.nframe_skipped = min(self.frame_offset - frame_offset, nframe) self.requested_frame_offset = frame_offset
def copy_array(dst, src): dst_bf = asarray(dst) src_bf = asarray(src) if (space_accessible(dst_bf.bf.space, ['system']) and space_accessible(src_bf.bf.space, ['system'])): np.copyto(dst_bf, src_bf) else: _check(_bf.bfArrayCopy(dst_bf.as_BFarray(), src_bf.as_BFarray())) if dst_bf.bf.space != src_bf.bf.space: # TODO: Decide where/when these need to be called device.stream_synchronize() return dst
def on_data(self, ispan): # TODO: Make this work in CUDA space dada_blk = next(self.hdu.data_block) nbyte = ispan.data.nbytes _check( _bf.bfMemcpy(dada_blk.ptr, _bf.BF_SPACE_SYSTEM, ispan.data.ctypes.data, _bf.BF_SPACE_SYSTEM, nbyte)) #dada_blk.data[:] = ispan.data.view('u8') dada_blk.close()
def execute_workspace(self, idata, odata, workspace_ptr, workspace_size, negative_delays=False): size = _bf.BFsize(workspace_size) _check( _bf.bfFdmtExecute(self.obj, asarray(idata).as_BFarray(), asarray(odata).as_BFarray(), negative_delays, workspace_ptr, size)) return odata
def __init__(self, ring, which='specific', name="", time_tag=None, other_obj=None, guarantee=True): SequenceBase.__init__(self, ring) self._ring = ring self.obj = _bf.BFrsequence() if which == 'specific': _check(_bf.bfRingSequenceOpen(self.obj, ring.obj, name, guarantee)) elif which == 'latest': _check(_bf.bfRingSequenceOpenLatest(self.obj, ring.obj, guarantee)) elif which == 'earliest': _check( _bf.bfRingSequenceOpenEarliest(self.obj, ring.obj, guarantee)) elif which == 'at': _check( _bf.bfRingSequenceOpenAt(self.obj, ring.obj, time_tag, guarantee)) #elif which == 'next': # self._check( self.lib.bfRingSequenceOpenNext(pointer(self.obj), other_obj) ) else: raise ValueError( "Invalid 'which' parameter; must be one of: 'specific', 'latest', 'earliest'" )
def __init__(self, ring, which='specific', name="", time_tag=None, other_obj=None, guarantee=True, header_transform=None): SequenceBase.__init__(self, ring) self._ring = ring # A function for transforming the header before it's read self.header_transform = header_transform self.obj = _bf.BFrsequence() if which == 'specific': _check(_bf.bfRingSequenceOpen(self.obj, ring.obj, name, guarantee)) elif which == 'at': assert (time_tag is not None) _check( _bf.bfRingSequenceOpenAt(self.obj, ring.obj, time_tag, guarantee)) elif which == 'latest': _check(_bf.bfRingSequenceOpenLatest(self.obj, ring.obj, guarantee)) elif which == 'earliest': _check( _bf.bfRingSequenceOpenEarliest(self.obj, ring.obj, guarantee)) else: raise ValueError( "Invalid 'which' parameter; must be one of: 'specific', 'latest', 'earliest'" )
def update(self, contents): """Updates (replaces) the contents of the log contents: string or dict containing data to write to the log """ if contents is None: raise ValueError("Contents cannot be None") if isinstance(contents, dict): contents = '\n'.join( ['%s : %s' % item for item in contents.items()]) try: contents = contents.encode() except AttributeError: # Python2 catch pass _check(_bf.bfProcLogUpdate(self.obj, contents))
def copy(self, space=None, order='C'): if order != 'C': raise NotImplementedError('Only order="C" is supported') if space is None: space = self.bf.space if not self.flags['C_CONTIGUOUS']: # Deal with arrays that need to have their layouts changed # TODO: Is there a better way to handle this? if space_accessible(self.bf.space, ['system']): ## For arrays that can be accessed from the system space, use ## numpy.ndarray.copy() to do the heavy lifting if space == 'cuda_managed': ## TODO: Decide where/when these need to be called device.stream_synchronize() ## This actually makes two copies and throws one away temp = ndarray(shape=self.shape, dtype=self.dtype, space=self.bf.space) temp[...] = np.array(self).copy() if self.bf.space != space: return ndarray(temp, space=space) return temp else: ## For arrays that can be access from CUDA, use bifrost.transpose ## to do the heavy lifting ### Figure out the correct axis order for C permute = np.argsort(self.strides)[::-1] c_shape = [self.shape[p] for p in permute] ### Make a BFarray wrapper for self so we can reset shape/strides ### to what they should be for a C ordered array self_corder = self.as_BFarray() shape_type = ctypes.c_long*_bf.BF_MAX_DIMS self_corder.shape = shape_type(*c_shape) self_corder.strides = shape_type(*[self.strides[p] for p in permute]) ### Make a temporary array with the right shape that will be C ordered temp = ndarray(shape=self.shape, dtype=self.dtype, space=self.bf.space) ### Run the transpose using the BFarray wrapper and the temporary array array_type = ctypes.c_int * self.ndim axes_array = array_type(*permute) _check(_bf.bfTranspose(self_corder, temp.as_BFarray(), axes_array)) if self.bf.space != space: return ndarray(temp, space=space) return temp # Note: This makes an actual copy as long as space is not None return ndarray(self, space=space)
def __init__(self, space='system', name=None, core=None): if name is None: name = str(uuid4()) name = _slugify(name) try: name = name.encode() except AttributeError: # Python2 catch pass space = _string2space(space) #self.obj = None #self.obj = _get(_bf.bfRingCreate(name=name, space=space), retarg=0) BifrostObject.__init__(self, _bf.bfRingCreate, _bf.bfRingDestroy, name, space) if core is not None: try: _check(_bf.bfRingSetAffinity(self.obj, core)) except RuntimeError: pass
def readinto(self, buf): dst_space = Space(_get_space(buf)).as_BFspace() byte0 = 0 nbyte = buf.nbytes nbyte_copy = min(nbyte - byte0, self.nbyte - self.byte0) while nbyte_copy: _check( _bf.bfMemcpy(buf.ctypes.data + byte0, dst_space, self.block.ptr + self.byte0, _bf.BF_SPACE_SYSTEM, nbyte_copy)) byte0 += nbyte_copy self.byte0 += nbyte_copy nbyte_copy = min(nbyte - byte0, self.nbyte - self.byte0) if self.nbyte - self.byte0 == 0: self.block.close() try: self._open_next_block() except StopIteration: break return byte0
def __init__(self, space='system', name=None, owner=None, core=None): # If this is non-None, then the object is wrapping a base Ring instance self.base = None self.space = space if name is None: name = 'ring_%i' % Ring.instance_count Ring.instance_count += 1 name = _slugify(name) try: name = name.encode() except AttributeError: # Python2 catch pass BifrostObject.__init__(self, _bf.bfRingCreate, _bf.bfRingDestroy, name, _string2space(self.space)) if core is not None: try: _check(_bf.bfRingSetAffinity(self.obj, core)) except RuntimeError: pass self.owner = owner self.header_transform = None
def quantize(src, dst, scale=1.): src_bf = asarray(src).as_BFarray() dst_bf = asarray(dst).as_BFarray() _check(_bf.bfQuantize(src_bf, dst_bf, scale)) return dst
def stream_synchronize(): _check(_bf.bfStreamSynchronize())