def test_fdmt(self): fdmt = Fdmt() ntime = 1024 nchan = 128 max_delay = 200 f0 = 1000. bw = 400. df = bw / nchan exponent = -2.0 fdmt.init(nchan, max_delay, f0, df, exponent, 'cuda') idata = bf.asarray(np.random.normal(size=(nchan,ntime)).astype(np.float32), space='cuda') odata1 = bf.asarray(-999*np.ones((max_delay,ntime), np.float32), space='cuda') fdmt.execute(idata, odata1) odata1 = odata1.copy('system') self.assertEqual(odata1.min(), -999) # TODO: Need better tests self.assertLess(odata1.max(), 100.) odata2 = bf.asarray(-999*np.ones((max_delay,ntime), np.float32), space='cuda') workspace_size = fdmt.get_workspace_size(idata, odata2) self.assertEqual(workspace_size, 3293184) workspace = bf.asarray(np.empty(workspace_size, np.uint8), space='cuda') workspace_ptr = workspace.ctypes.data fdmt.execute_workspace(idata, odata2, workspace_ptr, workspace_size) odata2 = odata2.copy('system') np.testing.assert_equal(odata1, odata2)
def run_test(self, ntime, nchan, max_delay, batch_shape=()): fdmt = Fdmt() f0 = 1000. bw = 400. df = bw / nchan exponent = -2.0 fdmt.init(nchan, max_delay, f0, df, exponent, 'cuda') ishape = batch_shape + (nchan, ntime) oshape = batch_shape + (max_delay, ntime) idata = bf.asarray(np.random.normal(size=ishape).astype(np.float32), space='cuda') odata1 = bf.asarray(-999 * np.ones(oshape, np.float32), space='cuda') fdmt.execute(idata, odata1) odata1 = odata1.copy('system') if max_delay > 1: self.assertEqual(odata1.min(), -999) # TODO: Need better tests self.assertLess(odata1.max(), 100.) odata2 = bf.asarray(-999 * np.ones(oshape, np.float32), space='cuda') workspace_size = fdmt.get_workspace_size(idata, odata2) workspace = bf.asarray(np.empty(workspace_size, np.uint8), space='cuda') workspace_ptr = workspace.ctypes.data fdmt.execute_workspace(idata, odata2, workspace_ptr, workspace_size) odata2 = odata2.copy('system') np.testing.assert_equal(odata1, odata2)
class FdmtBlock(TransformBlock): def __init__(self, iring, max_dm=None, max_delay=None, max_diagonal=None, exponent=-2.0, negative_delays=False, *args, **kwargs): super(FdmtBlock, self).__init__(iring, *args, **kwargs) if sum([m is not None for m in [max_dm, max_delay, max_diagonal]]) != 1: raise ValueError("Must specify exactly one of: max_dm, max_delay, " "max_diagonal") self.space = self.orings[0].space self.max_value = max_dm or max_delay or max_diagonal or 0. self.max_mode = ('dm' if max_dm is not None else 'delay' if max_delay is not None else 'diagonal' if max_diagonal is not None else 'error') self.kdm = 4.148741601e3 # MHz**2 cm**3 s / pc self.dm_units = 'pc cm^-3' self.exponent = exponent self.negative_delays = negative_delays self.fdmt = Fdmt() def define_valid_input_spaces(self): return ('cuda', ) def on_sequence(self, iseq): ihdr = iseq.header itensor = ihdr['_tensor'] labels = itensor['labels'] if labels[-1] != 'time' or labels[-2] != 'freq': raise KeyError("Expected axes [..., 'freq', 'time'], got %s" % labels) nchan = itensor['shape'][-2] f0_, df_ = itensor['scales'][-2] t0_, dt_ = itensor['scales'][-1] # Units must match self.kdm f0 = convert_units(f0_, itensor['units'][-2], 'MHz') df = convert_units(df_, itensor['units'][-2], 'MHz') dt = convert_units(dt_, itensor['units'][-1], 's') if self.max_mode == 'diagonal': max_diagonal = self.max_value self.max_mode = 'delay' self.max_value = int(math.ceil(nchan * max_diagonal)) if self.max_mode == 'dm': max_dm = self.max_value rel_delay = (self.kdm / dt * max_dm * (f0**-2 - (f0 + nchan * df)**-2)) self.max_delay = int(math.ceil(abs(rel_delay))) elif self.max_mode == 'delay': self.max_delay = self.max_value fac = (f0**-2 - (f0 + nchan * df)**-2) max_dm = self.max_delay * dt / (self.kdm * abs(fac)) else: raise ValueError("Unknown max mode: %s" % self.max_mode) if self.negative_delays: max_dm = -max_dm self.dm_step = max_dm / self.max_delay self.fdmt.init(nchan, self.max_delay, f0, df, self.exponent, self.space) ohdr = deepcopy(ihdr) if 'refdm' in ihdr: refdm = convert_units(ihdr['refdm'], ihdr['refdm_units'], self.dm_units) else: refdm = 0. # Update transformed axis info ohdr['_tensor']['dtype'] = 'f32' ohdr['_tensor']['shape'][-2] = self.max_delay ohdr['_tensor']['labels'][-2] = 'dispersion' ohdr['_tensor']['scales'][-2] = (refdm, self.dm_step) ohdr['_tensor']['units'][-2] = self.dm_units # Add some new metadata ohdr['max_dm'] = max_dm ohdr['max_dm_units'] = self.dm_units ohdr['cfreq'] = f0_ + 0.5 * (nchan - 1) * df_ ohdr['cfreq_units'] = itensor['units'][-2] ohdr['bw'] = nchan * df_ ohdr['bw_units'] = itensor['units'][-2] gulp_nframe = self.gulp_nframe or ihdr['gulp_nframe'] return ohdr def define_input_overlap_nframe(self, iseq): """Return no. input frames that should overlap between successive spans. """ return self.max_delay def on_data(self, ispan, ospan): if ispan.nframe <= self.max_delay: # Cannot fully process any frames return 0 size = self.fdmt.get_workspace_size(ispan.data, ospan.data) with self.get_temp_storage(self.space).allocate(size) as temp_storage: self.fdmt.execute_workspace(ispan.data, ospan.data, temp_storage.ptr, temp_storage.size, negative_delays=self.negative_delays)
class FdmtBlock(TransformBlock): def __init__(self, iring, max_dm, exponent=-2.0, negative_delays=False, *args, **kwargs): super(FdmtBlock, self).__init__(iring, *args, **kwargs) self.space = self.orings[0].space self.max_dm = max_dm self.kdm = 4.148741601e3 # MHz**2 cm**3 s / pc self.dm_units = 'pc cm^-3' self.exponent = exponent self.negative_delays = negative_delays self.fdmt = Fdmt() def define_valid_input_spaces(self): """Return set of valid spaces (or 'any') for each input""" return ('cuda', ) def on_sequence(self, iseq): ihdr = iseq.header itensor = ihdr['_tensor'] # TODO: Assert that axis labels match expected (and/or allow more flexibility in which axes are used) nchan = itensor['shape'][-2] npol = itensor['shape'][-3] f0_, df_ = itensor['scales'][-2] t0_, dt_ = itensor['scales'][-1] # Units must match self.kdm f0 = convert_units(f0_, itensor['units'][-2], 'MHz') df = convert_units(df_, itensor['units'][-2], 'MHz') dt = convert_units(dt_, itensor['units'][-1], 's') rel_delay = self.kdm / dt * self.max_dm * (f0**-2 - (f0 + nchan * df)**-2) self.max_delay = int(math.ceil(abs(rel_delay))) self.dm_step = self.max_dm / self.max_delay if self.negative_delays: self.dm_step *= -1 self.fdmt.init(nchan, self.max_delay, f0, df, self.exponent, self.space) ohdr = deepcopy(ihdr) if 'refdm' in ihdr: refdm = convert_units(ihdr['refdm'], ihdr['refdm_units'], self.dm_units) else: refdm = 0. # Update transformed axis info ohdr['_tensor']['dtype'] = 'f32' ohdr['_tensor']['shape'][-2] = self.max_delay ohdr['_tensor']['labels'][-2] = 'dispersion' ohdr['_tensor']['scales'][-2] = (refdm, self.dm_step) ohdr['_tensor']['units'][-2] = self.dm_units # Add some new metadata ohdr['max_dm'] = self.max_dm ohdr['max_dm_units'] = self.dm_units ohdr['cfreq'] = 0.5 * (f0_ + (nchan - 1) * df_) ohdr['cfreq_units'] = itensor['units'][-2] ohdr['bw'] = nchan * df_ ohdr['bw_units'] = itensor['units'][-2] gulp_nframe = self.gulp_nframe or ihdr['gulp_nframe'] return ohdr, slice(0, gulp_nframe + self.max_delay, gulp_nframe) def on_data(self, ispan, ospan): if ispan.nframe <= self.max_delay: # Cannot fully process any frames return 0 size = self.fdmt.get_workspace_size(ispan.data, ospan.data) with self.get_temp_storage(self.space).allocate(size) as temp_storage: self.fdmt.execute_workspace(ispan.data, ospan.data, temp_storage.ptr, temp_storage.size, negative_delays=self.negative_delays) return ispan.nframe - self.max_delay