def test_2d_active(self): shape = self.shape2D known_data = np.random.normal(size=shape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda_managed') odata = bf.empty_like(idata) coeffs = self.coeffs * 1.0 coeffs.shape += (1, ) coeffs = np.repeat(coeffs, idata.shape[1], axis=1) coeffs.shape = (coeffs.shape[0], idata.shape[1]) coeffs = bf.ndarray(coeffs, space='cuda_managed') fir = Fir() fir.init(coeffs, 1) fir.execute(idata, odata) fir.execute(idata, odata) stream_synchronize() for i in range(known_data.shape[1]): zf = lfiltic(self.coeffs, 1.0, 0.0) known_result, zf = lfilter(self.coeffs, 1.0, known_data[:, i], zi=zf) known_result, zf = lfilter(self.coeffs, 1.0, known_data[:, i], zi=zf) compare(odata[:, i], known_result)
def run_test_r2c_dtype(self, shape, axes, dtype=np.float32, scale=1., misalign=0): known_data = np.random.normal(size=shape).astype(np.float32) known_data = (known_data * scale).astype(dtype) # Force misaligned data padded_shape = shape[:-1] + (shape[-1] + misalign, ) known_data = np.resize(known_data, padded_shape) idata = bf.ndarray(known_data, space='cuda_managed') known_data = known_data[..., misalign:] idata = idata[..., misalign:] oshape = list(shape) oshape[axes[-1]] = shape[axes[-1]] // 2 + 1 odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda_managed') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) stream_synchronize() known_result = gold_rfftn(known_data.astype(np.float32) / scale, axes=axes) compare(odata, known_result)
def run_unpack_to_ci8_test(self, iarray): oarray = bf.ndarray(shape=iarray.shape, dtype='ci8', space='cuda_managed') oarray_known = bf.ndarray([[(0, 1), (2, 3)], [(4, 5), (6, 7)], [(-8, -7), (-6, -5)]], dtype='ci8') bf.unpack(iarray.copy(space='cuda_managed'), oarray) stream_synchronize() np.testing.assert_equal(oarray, oarray_known)
def run_reduce_test(self, shape, axis, n, op='sum', dtype=np.float32): a = ((np.random.random(size=shape) * 2 - 1) * 127).astype( np.int8).astype(dtype) if op[:3] == 'pwr': b_gold = pwrscrunch(a.astype(np.float32), n, axis, NP_OPS[op[3:]]) else: b_gold = scrunch(a.astype(np.float32), n, axis, NP_OPS[op]) a = bf.asarray(a, space='cuda_managed') b = bf.empty_like(b_gold, space='cuda_managed') bf.reduce(a, b, op) stream_synchronize() np.testing.assert_allclose(b, b_gold)
def copy_array(dst, src): dst_bf = asarray(dst) src_bf = asarray(src) if (space_accessible(dst_bf.bf.space, ['system']) and space_accessible(src_bf.bf.space, ['system'])): np.copyto(dst_bf, src_bf) else: _check(_bf.bfArrayCopy(dst_bf.as_BFarray(), src_bf.as_BFarray())) if dst_bf.bf.space != src_bf.bf.space: # TODO: Decide where/when these need to be called device.stream_synchronize() return dst
def run_simple_test(self, x, funcstr, func): x_orig = x x = bf.asarray(x, 'cuda_managed') y = bf.empty_like(x) x.flags['WRITEABLE'] = False x.bf.immutable = True # TODO: Is this actually doing anything? (flags is, just not sure about bf.immutable) for _ in range(3): bf.map(funcstr, {'x': x, 'y': y}) stream_synchronize() if isinstance(x_orig, bf.ndarray): x_orig = x # Note: Using func(x) is dangerous because bf.ndarray does things like # lazy .conj(), which break when used as if it were np.ndarray. np.testing.assert_equal(y, func(x_orig))
def copy(self, space=None, order='C'): if order != 'C': raise NotImplementedError('Only order="C" is supported') if space is None: space = self.bf.space if not self.flags['C_CONTIGUOUS']: # Deal with arrays that need to have their layouts changed # TODO: Is there a better way to handle this? if space_accessible(self.bf.space, ['system']): ## For arrays that can be accessed from the system space, use ## numpy.ndarray.copy() to do the heavy lifting if space == 'cuda_managed': ## TODO: Decide where/when these need to be called device.stream_synchronize() ## This actually makes two copies and throws one away temp = ndarray(shape=self.shape, dtype=self.dtype, space=self.bf.space) temp[...] = np.array(self).copy() if self.bf.space != space: return ndarray(temp, space=space) return temp else: ## For arrays that can be access from CUDA, use bifrost.transpose ## to do the heavy lifting ### Figure out the correct axis order for C permute = np.argsort(self.strides)[::-1] c_shape = [self.shape[p] for p in permute] ### Make a BFarray wrapper for self so we can reset shape/strides ### to what they should be for a C ordered array self_corder = self.as_BFarray() shape_type = ctypes.c_long*_bf.BF_MAX_DIMS self_corder.shape = shape_type(*c_shape) self_corder.strides = shape_type(*[self.strides[p] for p in permute]) ### Make a temporary array with the right shape that will be C ordered temp = ndarray(shape=self.shape, dtype=self.dtype, space=self.bf.space) ### Run the transpose using the BFarray wrapper and the temporary array array_type = ctypes.c_int * self.ndim axes_array = array_type(*permute) _check(_bf.bfTranspose(self_corder, temp.as_BFarray(), axes_array)) if self.bf.space != space: return ndarray(temp, space=space) return temp # Note: This makes an actual copy as long as space is not None return ndarray(self, space=space)
def main(self, orings): for sourcename in self.sourcenames: if self.shutdown_event.is_set(): break with self.create_reader(sourcename) as ireader: oheaders = self.on_sequence(ireader, sourcename) for ohdr in oheaders: if 'time_tag' not in ohdr: ohdr['time_tag'] = self._seq_count if 'name' not in ohdr: ohdr['name'] = 'unnamed-sequence-%i' % self._seq_count self._seq_count += 1 with ExitStack() as oseq_stack: oseqs, ogulp_overlaps = self.begin_sequences( oseq_stack, orings, oheaders, igulp_nframes=[], istride_nframes=[]) while not self.shutdown_event.is_set(): prev_time = time.time() with ExitStack() as ospan_stack: ospans = self.reserve_spans(ospan_stack, oseqs) cur_time = time.time() reserve_time = cur_time - prev_time prev_time = cur_time ostrides_actual = self.on_data(ireader, ospans) device.stream_synchronize() self.commit_spans(ospans, ostrides_actual, ogulp_overlaps) # TODO: Is this an OK way to detect end-of-data? if any( [ostride == 0 for ostride in ostrides_actual]): break cur_time = time.time() process_time = cur_time - prev_time prev_time = cur_time self.perf_proclog.update({ 'acquire_time': -1, 'reserve_time': reserve_time, 'process_time': process_time })
def main(self, orings): for iseqs in izip( * [iring.read(guarantee=self.guarantee) for iring in self.irings]): if self.shutdown_event.is_set(): break for i, iseq in enumerate(iseqs): self.sequence_proclogs[i].update(iseq.header) oheaders = self._on_sequence(iseqs) for ohdr in oheaders: if 'time_tag' not in ohdr: ohdr['time_tag'] = self._seq_count self._seq_count += 1 igulp_nframes = [ self.gulp_nframe or iseq.header['gulp_nframe'] for iseq in iseqs ] igulp_overlaps = self._define_input_overlap_nframe(iseqs) istride_nframes = igulp_nframes[:] igulp_nframes = [ igulp_nframe + nframe_overlap for igulp_nframe, nframe_overlap in zip( igulp_nframes, igulp_overlaps) ] for iseq, igulp_nframe in zip(iseqs, igulp_nframes): if self.buffer_factor is None: src_block = iseq.ring.owner if src_block is not None and self.is_fused_with(src_block): buffer_factor = 1 else: buffer_factor = None else: buffer_factor = self.buffer_factor iseq.resize(gulp_nframe=igulp_nframe, buf_nframe=self.buffer_nframe, buffer_factor=buffer_factor) # TODO: Ever need to specify starting offset? iframe0s = [0 for _ in igulp_nframes] force_skip = False with ExitStack() as oseq_stack: oseqs, ogulp_overlaps = self.begin_sequences( oseq_stack, orings, oheaders, igulp_nframes, istride_nframes) if self.shutdown_event.is_set(): break prev_time = time.time() for ispans in izip(*[ iseq.read(igulp_nframe, istride_nframe, iframe0) for (iseq, igulp_nframe, istride_nframe, iframe0) in zip(iseqs, igulp_nframes, istride_nframes, iframe0s) ]): if self.shutdown_event.is_set(): return if any([ispan.nframe_skipped for ispan in ispans]): # There were skipped (overwritten) frames with ExitStack() as ospan_stack: iskip_slices = [ slice(iframe0, iframe0 + ispan.nframe_skipped, istride_nframe) for iframe0, istride_nframe, ispan in zip( iframe0s, istride_nframes, ispans) ] iskip_nframes = [ ispan.nframe_skipped for ispan in ispans ] # ***TODO: Need to loop over multiple ospans here, # because iskip_nframes can be # arbitrarily large! ospans = self.reserve_spans( ospan_stack, oseqs, iskip_nframes) ostrides_actual = self._on_skip( iskip_slices, ospans) device.stream_synchronize() self.commit_spans(ospans, ostrides_actual, ogulp_overlaps) if all([ispan.nframe == 0 for ispan in ispans]): # No data to see here, move right along continue cur_time = time.time() acquire_time = cur_time - prev_time prev_time = cur_time with ExitStack() as ospan_stack: igulp_nframes = [ispan.nframe for ispan in ispans] ospans = self.reserve_spans(ospan_stack, oseqs, igulp_nframes) cur_time = time.time() reserve_time = cur_time - prev_time prev_time = cur_time if not force_skip: # *TODO: See if can fuse together multiple on_data calls here before # calling stream_synchronize(). # Consider passing .data instead of rings here ostrides_actual = self._on_data(ispans, ospans) device.stream_synchronize() any_frames_overwritten = any( [ispan.nframe_overwritten for ispan in ispans]) if force_skip or any_frames_overwritten: # Note: To allow interrupted pipelines to catch up, # we force-skip an additional gulp whenever # a span is overwritten during on_data. force_skip = any_frames_overwritten iskip_slices = [ slice( ispan.frame_offset, ispan.frame_offset + ispan.nframe_overwritten, istride_nframe) for ispan, istride_nframe in zip( ispans, istride_nframes) ] ostrides_actual = self._on_skip( iskip_slices, ospans) device.stream_synchronize() self.commit_spans(ospans, ostrides_actual, ogulp_overlaps) cur_time = time.time() process_time = cur_time - prev_time prev_time = cur_time self.perf_proclog.update({ 'acquire_time': acquire_time, 'reserve_time': reserve_time, 'process_time': process_time }) # **TODO: This will not be called if an exception is raised # Need to call it from a context manager somehow self._on_sequence_end(iseqs)