def fold(fh, comm, samplerate, fedge, fedge_at_top, nchan, nt, ntint, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, rfi_filter_raw=None, rfi_filter_power=None, return_fits=False): """ FFT data, fold by phase/time and make a waterfall series Folding is done from the position the file is currently in Parameters ---------- fh : file handle handle to file holding voltage timeseries comm: MPI communicator or None will use size, rank attributes samplerate : Quantity rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of the file that is read. dedisperse : None or string (default: incoherent). None, 'incoherent', 'coherent', 'by-channel'. Note: None really does nothing do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool or int whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets return_fits : bool (default: False) return a subint fits table for rank == 0 (None otherwise) """ assert dedisperse in (None, 'incoherent', 'by-channel', 'coherent') assert nchan % fh.nchan == 0 if dedisperse == 'by-channel': oversample = nchan // fh.nchan assert ntint % oversample == 0 else: oversample = 1 if dedisperse == 'coherent' and fh.nchan > 1: raise ValueError("For coherent dedispersion, data must be " "unchannelized before folding.") if comm is None: mpi_rank = 0 mpi_size = 1 else: mpi_rank = comm.rank mpi_size = comm.size npol = getattr(fh, 'npol', 1) assert npol == 1 or npol == 2 if verbose > 1 and mpi_rank == 0: print("Number of polarisations={}".format(npol)) # initialize folded spectrum and waterfall # TODO: use estimated number of points to set dtype if do_foldspec: foldspec = np.zeros((ntbin, nchan, ngate, npol**2), dtype=np.float32) icount = np.zeros((ntbin, nchan, ngate), dtype=np.int32) else: foldspec = None icount = None if do_waterfall: nwsize = nt * ntint // ntw waterfall = np.zeros((nwsize, nchan, npol**2), dtype=np.float64) else: waterfall = None if verbose and mpi_rank == 0: print('Reading from {}'.format(fh)) nskip = fh.tell() / fh.blocksize if nskip > 0: if verbose and mpi_rank == 0: print('Starting {0} blocks = {1} bytes out from start.'.format( nskip, nskip * fh.blocksize)) dt1 = (1. / samplerate).to(u.s) # need 2*nchan real-valued samples for each FFT if fh.telescope == 'lofar': dtsample = fh.dtsample else: dtsample = nchan // oversample * 2 * dt1 tstart = dtsample * ntint * nskip # pre-calculate time delay due to dispersion in coarse channels # for channelized data, frequencies are known if fh.nchan == 1: if getattr(fh, 'data_is_complex', False): # for complex data, really each complex sample consists of # 2 real ones, so multiply dt1 by 2. if fedge_at_top: freq = fedge - fftfreq(nchan, 2. * dt1.value) * u.Hz else: freq = fedge + fftfreq(nchan, 2. * dt1.value) * u.Hz else: if fedge_at_top: freq = fedge - rfftfreq(nchan * 2, dt1.value)[::2] * u.Hz else: freq = fedge + rfftfreq(nchan * 2, dt1.value)[::2] * u.Hz freq_in = freq else: # input frequencies may not be the ones going out freq_in = fh.frequencies if oversample == 1: freq = freq_in else: if fedge_at_top: freq = (freq_in[:, np.newaxis] - u.Hz * fftfreq(oversample, dtsample.value)) else: freq = (freq_in[:, np.newaxis] + u.Hz * fftfreq(oversample, dtsample.value)) ifreq = freq.ravel().argsort() # pre-calculate time offsets in (input) channelized streams dt = dispersion_delay_constant * dm * (1. / freq_in**2 - 1. / fref**2) if dedisperse in ['coherent', 'by-channel']: # pre-calculate required turns due to dispersion if fedge_at_top: fcoh = (freq_in[np.newaxis, :] - u.Hz * fftfreq(ntint, dtsample.value)[:, np.newaxis]) else: fcoh = (freq_in[np.newaxis, :] + u.Hz * fftfreq(ntint, dtsample.value)[:, np.newaxis]) # set frequency relative to which dispersion is coherently corrected if dedisperse == 'coherent': _fref = fref else: _fref = freq_in[np.newaxis, :] # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astronomy dang = (dispersion_delay_constant * dm * fcoh * (1. / _fref - 1. / fcoh)**2) * u.cycle with u.set_enabled_equivalencies(u.dimensionless_angles()): dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) # add dimension for polarisation dd_coh = dd_coh[..., np.newaxis] # Calculate the part of the whole file this node should handle. size_per_node = (nt - 1) // mpi_size + 1 start_block = mpi_rank * size_per_node end_block = min((mpi_rank + 1) * size_per_node, nt) for j in range(start_block, end_block): if verbose and j % progress_interval == 0: print('#{:4d}/{:4d} is doing {:6d}/{:6d} [={:6d}/{:6d}]; ' 'time={:18.12f}'.format( mpi_rank, mpi_size, j + 1, nt, j - start_block + 1, end_block - start_block, (tstart + dtsample * j * ntint).value)) # time since start # Just in case numbers were set wrong -- break if file ends; # better keep at least the work done. try: raw = fh.seek_record_read(int((nskip + j) * fh.blocksize), fh.blocksize) except (EOFError, IOError) as exc: print("Hit {0!r}; writing data collected.".format(exc)) break if verbose >= 2: print("#{:4d}/{:4d} read {} items".format(mpi_rank, mpi_size, raw.size), end="") if npol == 2: # multiple polarisations raw = raw.view(raw.dtype.fields.values()[0][0]) if fh.nchan == 1: # raw.shape=(ntint*npol) raw = raw.reshape(-1, npol) else: # raw.shape=(ntint, nchan*npol) raw = raw.reshape(-1, fh.nchan, npol) if rfi_filter_raw is not None: raw, ok = rfi_filter_raw(raw) if verbose >= 2: print("... raw RFI (zap {0}/{1})".format( np.count_nonzero(~ok), ok.size), end="") if np.can_cast(raw.dtype, np.float32): vals = raw.astype(np.float32) else: assert raw.dtype.kind == 'c' vals = raw if fh.nchan == 1: # have real-valued time stream of complex baseband # if we need some coherentdedispersion, do FT of whole thing, # otherwise to output channels if raw.dtype.kind == 'c': ftchan = nchan if dedisperse == 'incoherent' else len(vals) vals = fft(vals.reshape(-1, ftchan, npol), axis=1, overwrite_x=True, **_fftargs) else: # real data ftchan = nchan if dedisperse == 'incoherent' else len( vals) // 2 vals = rfft(vals.reshape(-1, ftchan * 2, npol), axis=1, overwrite_x=True, **_fftargs) # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2] # re-order to normal fft format (like Numerical Recipes): # Re[0], Re[n], Re[1], Im[1], .... (channel 0 is junk anyway) vals = np.hstack( (vals[:, 0], vals[:, -1], vals[:, 1:-1])).view(np.complex64) # for incoherent, vals.shape=(ntint, nchan, npol) -> OK # for others, have (1, ntint*nchan, npol) # reshape(nchan, ntint) gives rough as slowly varying -> .T if dedisperse != 'incoherent': fine = vals.reshape(nchan, -1, npol).transpose(1, 0, 2) # now have fine.shape=(ntint, nchan, npol) else: # data already channelized if dedisperse == 'by-channel': fine = fft(vals, axis=0, overwrite_x=True, **_fftargs) # have fine.shape=(ntint, fh.nchan, npol) if dedisperse in ['coherent', 'by-channel']: fine *= dd_coh # rechannelize to output channels if oversample > 1 and dedisperse == 'by-channel': # fine.shape=(ntint*oversample, chan_in, npol) # =(coarse,fine,fh.chan, npol) # -> reshape(oversample, ntint, fh.nchan, npol) # want (ntint=fine, fh.nchan, oversample, npol) -> .transpose fine = (fine.reshape(oversample, -1, fh.nchan, npol).transpose( 1, 2, 0, 3).reshape(-1, nchan, npol)) # now, for both, fine.shape=(ntint, nchan, npol) vals = ifft(fine, axis=0, overwrite_x=True, **_fftargs) # vals[time, chan, pol] if verbose >= 2: print("... dedispersed", end="") if npol == 1: power = vals.real**2 + vals.imag**2 else: p0 = vals[..., 0] p1 = vals[..., 1] power = np.empty(vals.shape[:-1] + (4, ), np.float32) power[..., 0] = p0.real**2 + p0.imag**2 power[..., 1] = p0.real * p1.real + p0.imag * p1.imag power[..., 2] = p0.imag * p1.real - p0.real * p1.imag power[..., 3] = p1.real**2 + p1.imag**2 if verbose >= 2: print("... power", end="") if rfi_filter_power is not None: power = rfi_filter_power(power) print("... power RFI", end="") # current sample positions in stream isr = j * (ntint // oversample) + np.arange(ntint // oversample) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[iw, :] += np.sum(power[isr // ntw == iw], axis=0)[ifreq] if verbose >= 2: print("... waterfall", end="") if do_foldspec: ibin = (j * ntbin) // nt # bin in the time series: 0..ntbin-1 # times since start tsample = (tstart + isr * dtsample * oversample)[:, np.newaxis] # correct for delay if needed if dedisperse in ['incoherent', 'by-channel']: # tsample.shape=(ntint/oversample, nchan_in) tsample = tsample - dt phase = (phasepol(tsample.to(u.s).value.ravel()).reshape( tsample.shape)) # corresponding PSR phases iphase = np.remainder(phase * ngate, ngate).astype(np.int) for k, kfreq in enumerate(ifreq): # sort in frequency while at it iph = iphase[:, (0 if iphase.shape[1] == 1 else kfreq // oversample)] # sum and count samples by phase bin for ipow in xrange(npol**2): foldspec[ibin, k, :, ipow] += np.bincount(iph, power[:, kfreq, ipow], ngate) icount[ibin, k, :] += np.bincount(iph, power[:, kfreq, 0] != 0., ngate) if verbose >= 2: print("... folded", end="") if verbose >= 2: print("... done") #Commented out as workaround, this was causing "Referenced before assignment" errors with JB data #if verbose >= 2 or verbose and mpi_rank == 0: # print('#{:4d}/{:4d} read {:6d} out of {:6d}' # .format(mpi_rank, mpi_size, j+1, nt)) if npol == 1: if do_foldspec: foldspec = foldspec.reshape(foldspec.shape[:-1]) if do_waterfall: waterfall = waterfall.reshape(waterfall.shape[:-1]) return foldspec, icount, waterfall
def fold(file1, dtype, samplerate, fedge, fedge_at_top, nchan, nt, ntint, nhead, ngate, ntbin, ntw, dm, fref, phasepol, coherent=False, do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100): """FFT ARO data, fold by phase/time and make a waterfall series Parameters ---------- file1 : string name of the file holding voltage timeseries dtype : numpy dtype or '4bit' or '1bit' way the data are stored in the file samplerate : float rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: book whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation nhead : int number of bytes to skip before reading (usually 0 for ARO) ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets """ # initialize folded spectrum and waterfall foldspec2 = np.zeros((nchan, ngate, ntbin)) nwsize = nt*ntint//ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (simple for ARO: 1 byte/sample) recsize = nchan*ntint*{np.int8: 2, '4bit': 1}[dtype] if verbose: print('Reading from {}'.format(file1)) with open(file1, 'rb', recsize) as fh1: if nhead > 0: if verbose: print('Skipping {0} bytes'.format(nhead)) fh1.seek(nhead) foldspec = np.zeros((nchan, ngate), dtype=np.int) icount = np.zeros((nchan, ngate), dtype=np.int) dt1 = (1./samplerate).to(u.s) if coherent: # pre-calculate required turns due to dispersion fcoh = (fedge - rfftfreq(nchan*ntint, dt1.value) * u.Hz if fedge_at_top else fedge + rfftfreq(nchan*ntint, dt1.value) * u.Hz) # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1./fref-1./fcoh)**2) * 360. * u.deg dedisperse = np.exp(dang.to(u.rad).value * 1j ).conj().astype(np.complex64).view(np.float32) # get these back into order r[0], r[1],i[1],...r[n-1],i[n-1],r[n] dedisperse = np.hstack([dedisperse[:1], dedisperse[2:-1]]) else: # pre-calculate time delay due to dispersion; # [::2] sets frequency channels to numerical recipes ordering freq = (fedge - rfftfreq(nchan*2, dt1.value)[::2] * u.Hz if fedge_at_top else fedge + rfftfreq(nchan*2, dt1.value)[::2] * u.Hz) dt = (dispersion_delay_constant * dm * (1./freq**2 - 1./fref**2)).to(u.s).value # need 2*nchan samples for each FFT dtsample = (nchan*2/samplerate).to(u.s).value for j in xrange(nt): if verbose and (j+1) % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j+1, nt, dtsample*j*ntint)) # equivalent time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # data just a series of bytes, each containing one 8 bit or # two 4-bit samples (set by dtype in caller) raw = fromfile(fh1, dtype, recsize) except(EOFError, IOError) as exc: print("Hit {}; writing pgm's".format(exc)) break vals = raw.astype(np.float32) if coherent: fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs) fine *= dedisperse vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs) chan2 = rfft(vals.reshape(-1, nchan*2), axis=-1, overwrite_x=True, **_fftargs)**2 # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2] # re-order to Num.Rec. format: Re[0], Re[n/2], Re[1], .... power = np.hstack((chan2[:,:1]+chan2[:,-1:], chan2[:,1:-1].reshape(-1,nchan-1,2).sum(-1))) # current sample positions in stream isr = j*ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:,iw] += np.sum(power[isr//ntw == iw], axis=0) if do_foldspec: tsample = dtsample*isr # times since start for k in xrange(nchan): if coherent: t = tsample # already dedispersed else: t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k] += np.bincount(iphase, power[:,k], ngate) icount[k] += np.bincount(iphase, None, ngate) ibin = j*ntbin//nt # bin in the time series: 0..ntbin-1 if (j+1)*ntbin//nt > ibin: # last addition to bin? # get normalised flux in each bin (where any were added) nonzero = icount > 0 nfoldspec = np.where(nonzero, foldspec/icount, 0.) # subtract phase average and store nfoldspec -= np.where(nonzero, np.sum(nfoldspec, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0) foldspec2[:,:,ibin] = nfoldspec # reset for next iteration foldspec *= 0 icount *= 0 if verbose: print('read {0:6d} out of {1:6d}'.format(j+1, nt)) if do_waterfall: nonzero = waterfall == 0. waterfall -= np.where(nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) return foldspec2, waterfall
def fold(fh1, dtype, samplerate, fedge, fedge_at_top, nchan, nt, ntint, nhead, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100): """FFT ARO data, fold by phase/time and make a waterfall series Parameters ---------- fh1 : file handle handle to file holding voltage timeseries dtype : numpy dtype or '4bit' or '1bit' way the data are stored in the file samplerate : float rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation nhead : int number of bytes to skip before reading (usually 0 for ARO) ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) dedisperse : None or string None, 'incoherent', 'coherent', 'by-channel' do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets """ # initialize folded spectrum and waterfall foldspec2 = np.zeros((nchan, ngate, ntbin)) nwsize = nt * ntint // ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (simple for ARO: 1 byte/sample) # double since we need to get ntint samples after FFT recsize = nchan * ntint * {np.int8: 2, '4bit': 1}[dtype] if verbose: print('Reading from {}'.format(fh1)) if nhead > 0: if verbose: print('Skipping {0} bytes'.format(nhead)) fh1.seek(nhead) foldspec = np.zeros((nchan, ngate), dtype=np.int) icount = np.zeros((nchan, ngate), dtype=np.int) dt1 = (1. / samplerate).to(u.s) # need 2*nchan real-valued samples for each FFT dtsample = nchan * 2 * dt1 # pre-calculate time delay due to dispersion in coarse channels freq = (fedge - rfftfreq(nchan * 2, dt1.value) * u.Hz if fedge_at_top else fedge + rfftfreq(nchan * 2, dt1.value) * u.Hz) # [::2] sets frequency channels to numerical recipes ordering dt = (dispersion_delay_constant * dm * (1. / freq[::2]**2 - 1. / fref**2)).to(u.s).value if dedisperse in {'coherent', 'by-channel'}: # pre-calculate required turns due to dispersion fcoh = (fedge - rfftfreq(nchan * 2 * ntint, dt1.value) * u.Hz if fedge_at_top else fedge + rfftfreq(nchan * 2 * ntint, dt1.value) * u.Hz) # set frequency relative to which dispersion is coherently corrected if dedisperse == 'coherent': _fref = fref else: # _fref = np.round((fcoh * dtsample).to(1).value) / dtsample _fref = np.repeat(freq.value, ntint) * freq.unit # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1. / _fref - 1. / fcoh)**2) * 360. * u.deg # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n] # for 0 and n need only real part, but for 1...n-1 need real, imag # so just get shifts for r[1], r[2], ..., r[n-1] dang = dang.to(u.rad).value[1:-1:2] dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) for j in xrange(nt): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j + 1, nt, dtsample.value * j * ntint)) # time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # data just a series of bytes, each containing one 8 bit or # two 4-bit samples (set by dtype in caller) raw = fromfile(fh1, dtype, recsize) except (EOFError, IOError) as exc: print("Hit {}; writing pgm's".format(exc)) break if verbose == 'very': print("Read {} items".format(raw.size), end="") vals = raw.astype(np.float32) if dedisperse in {'coherent', 'by-channel'}: fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs) fine_cmplx = fine[1:-1].view(np.complex64) fine_cmplx *= dd_coh # this overwrites parts of fine, as intended vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs) if verbose == 'very': print("... dedispersed", end="") chan2 = rfft(vals.reshape(-1, nchan * 2), axis=-1, overwrite_x=True, **_fftargs)**2 # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2] # re-order to Num.Rec. format: Re[0], Re[n/2], Re[1], .... power = np.hstack((chan2[:, :1] + chan2[:, -1:], chan2[:, 1:-1].reshape(-1, nchan - 1, 2).sum(-1))) if verbose == 'very': print("... power", end="") # current sample positions in stream isr = j * ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:, iw] += np.sum(power[isr // ntw == iw], axis=0) if verbose == 'very': print("... waterfall", end="") if do_foldspec: tsample = dtsample.value * isr # times since start for k in xrange(nchan): if dedisperse == 'coherent': t = tsample # already dedispersed else: t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase * ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k] += np.bincount(iphase, power[:, k], ngate) icount[k] += np.bincount(iphase, None, ngate) if verbose == 'very': print("... folded", end="") ibin = j * ntbin // nt # bin in the time series: 0..ntbin-1 if (j + 1) * ntbin // nt > ibin: # last addition to bin? # get normalised flux in each bin (where any were added) nonzero = icount > 0 nfoldspec = np.where(nonzero, foldspec / icount, 0.) # subtract phase average and store nfoldspec -= np.where( nonzero, np.sum(nfoldspec, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0) foldspec2[:, :, ibin] = nfoldspec # reset for next iteration foldspec *= 0 icount *= 0 if verbose == 'very': print("... added", end="") if verbose == 'very': print("... done") if verbose: print('read {0:6d} out of {1:6d}'.format(j + 1, nt)) if do_waterfall: nonzero = waterfall == 0. waterfall -= np.where( nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) return foldspec2, waterfall
def fold(fh, comm, samplerate, fedge, fedge_at_top, nchan, nt, ntint, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, rfi_filter_raw=None, rfi_filter_power=None, return_fits=False): """ FFT data, fold by phase/time and make a waterfall series Folding is done from the position the file is currently in Parameters ---------- fh : file handle handle to file holding voltage timeseries comm: MPI communicator or None will use size, rank attributes samplerate : Quantity rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of the file that is read. dedisperse : None or string (default: incoherent). None, 'incoherent', 'coherent', 'by-channel'. Note: None really does nothing do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool or int whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets return_fits : bool (default: False) return a subint fits table for rank == 0 (None otherwise) """ assert dedisperse in (None, 'incoherent', 'by-channel', 'coherent') need_fine_channels = dedisperse in ['by-channel', 'coherent'] assert nchan % fh.nchan == 0 if dedisperse == 'by-channel' and fh.nchan > 1: oversample = nchan // fh.nchan assert ntint % oversample == 0 else: oversample = 1 if dedisperse == 'coherent' and fh.nchan > 1: warnings.warn("Doing coherent dedispersion on channelized data. " "May get artefacts!") if comm is None: mpi_rank = 0 mpi_size = 1 else: mpi_rank = comm.rank mpi_size = comm.size npol = getattr(fh, 'npol', 1) assert npol == 1 or npol == 2 if verbose > 1 and mpi_rank == 0: print("Number of polarisations={}".format(npol)) # initialize folded spectrum and waterfall # TODO: use estimated number of points to set dtype if do_foldspec: foldspec = np.zeros((ntbin, nchan, ngate, npol**2), dtype=np.float32) icount = np.zeros((ntbin, nchan, ngate), dtype=np.int32) else: foldspec = None icount = None if do_waterfall: nwsize = nt*ntint//ntw//oversample waterfall = np.zeros((nwsize, nchan, npol**2), dtype=np.float64) else: waterfall = None if verbose and mpi_rank == 0: print('Reading from {}'.format(fh)) nskip = fh.tell()/fh.blocksize if nskip > 0: if verbose and mpi_rank == 0: print('Starting {0} blocks = {1} bytes out from start.' .format(nskip, nskip*fh.blocksize)) dt1 = (1./samplerate).to(u.s) # need 2*nchan real-valued samples for each FFT if fh.telescope == 'lofar': dtsample = fh.dtsample else: dtsample = nchan // oversample * 2 * dt1 tstart = dtsample * ntint * nskip # pre-calculate time delay due to dispersion in coarse channels # for channelized data, frequencies are known tb = -1. if fedge_at_top else +1. if fh.nchan == 1: if getattr(fh, 'data_is_complex', False): # for complex data, really each complex sample consists of # 2 real ones, so multiply dt1 by 2. freq = fedge + tb * fftfreq(nchan, 2.*dt1.value) * u.Hz if dedisperse == 'coherent': fcoh = fedge + tb * fftfreq(nchan*ntint, 2.*dt1.value) * u.Hz fcoh.shape = (-1, 1) elif dedisperse == 'by-channel': fcoh = freq + (tb * fftfreq( ntint, 2.*dtsample.value) * u.Hz)[:, np.newaxis] else: freq = fedge + tb * rfftfreq(nchan*2, dt1.value)[::2] * u.Hz if dedisperse == 'coherent': fcoh = fedge + tb * rfftfreq(nchan*ntint*2, dt1.value)[::2] * u.Hz fcoh.shape = (-1, 1) elif dedisperse == 'by-channel': fcoh = freq + tb * fftfreq( ntint, dtsample.value)[:, np.newaxis] * u.Hz freq_in = freq else: # input frequencies may not be the ones going out freq_in = fh.frequencies if oversample == 1: freq = freq_in else: freq = (freq_in[:, np.newaxis] + tb * u.Hz * rfftfreq(oversample*2, dtsample.value/2.)[::2]) # same as fine = rfftfreq(2*ntint, dtsample.value/2.)[::2] fcoh = freq_in[np.newaxis, :] + tb * u.Hz * rfftfreq( ntint*2, dtsample.value/2.)[::2, np.newaxis] # print('fedge_at_top={0}, tb={1}'.format(fedge_at_top, tb)) ifreq = freq.ravel().argsort() # pre-calculate time offsets in (input) channelized streams dt = dispersion_delay_constant * dm * (1./freq_in**2 - 1./fref**2) if need_fine_channels: # pre-calculate required turns due to dispersion. # # set frequency relative to which dispersion is coherently corrected if dedisperse == 'coherent': _fref = fref else: _fref = freq_in[np.newaxis, :] # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astronomy dang = (dispersion_delay_constant * dm * fcoh * (1./_fref-1./fcoh)**2) * u.cycle with u.set_enabled_equivalencies(u.dimensionless_angles()): dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) # add dimension for polarisation dd_coh = dd_coh[..., np.newaxis] # Calculate the part of the whole file this node should handle. size_per_node = (nt-1)//mpi_size + 1 start_block = mpi_rank*size_per_node end_block = min((mpi_rank+1)*size_per_node, nt) for j in range(start_block, end_block): if verbose and j % progress_interval == 0: print('#{:4d}/{:4d} is doing {:6d}/{:6d} [={:6d}/{:6d}]; ' 'time={:18.12f}' .format(mpi_rank, mpi_size, j+1, nt, j-start_block+1, end_block-start_block, (tstart+dtsample*j*ntint).value)) # time since start # Just in case numbers were set wrong -- break if file ends; # better keep at least the work done. try: raw = fh.seek_record_read(int((nskip+j)*fh.blocksize), fh.blocksize) except(EOFError, IOError) as exc: print("Hit {0!r}; writing data collected.".format(exc)) break if verbose >= 2: print("#{:4d}/{:4d} read {} items" .format(mpi_rank, mpi_size, raw.size), end="") if npol == 2: # multiple polarisations raw = raw.view(raw.dtype.fields.values()[0][0]) if fh.nchan == 1: # raw.shape=(ntint*npol) raw = raw.reshape(-1, npol) else: # raw.shape=(ntint, nchan*npol) raw = raw.reshape(-1, fh.nchan, npol) if rfi_filter_raw is not None: raw, ok = rfi_filter_raw(raw) if verbose >= 2: print("... raw RFI (zap {0}/{1})" .format(np.count_nonzero(~ok), ok.size), end="") if np.can_cast(raw.dtype, np.float32): vals = raw.astype(np.float32) else: assert raw.dtype.kind == 'c' vals = raw if fh.nchan == 1: # have real-valued time stream of complex baseband # if we need some coherentdedispersion, do FT of whole thing, # otherwise to output channels if raw.dtype.kind == 'c': ftchan = len(vals) if dedisperse == 'coherent' else nchan vals = fft(vals.reshape(-1, ftchan, npol), axis=1, overwrite_x=True, **_fftargs) else: # real data ftchan = len(vals) // 2 if dedisperse == 'coherent' else nchan vals = rfft(vals.reshape(-1, ftchan*2, npol), axis=1, overwrite_x=True, **_fftargs) if vals.dtype.kind == 'f': # this depends on version, sigh. # rfft: Re[0], Re[1], Im[1],.,Re[n/2-1], Im[n/2-1], Re[n/2] # re-order to normal fft format (like Numerical Recipes): # Re[0], Re[n], Re[1], Im[1], .... (channel 0 junk anyway) vals = (np.hstack((vals[:, :1], vals[:, -1:], vals[:, 1:-1])) .reshape(-1, ftchan, 2 * npol)) if npol == 2: # reorder pol & real/imag vals1 = vals[:, :, 1] vals[:, :, 1] = vals[:, :, 2] vals[:, :, 2] = vals1 vals = vals.reshape(-1, ftchan, npol, 2) else: vals[:, 0] = vals[:, 0].real + 1j * vals[:, -1].real vals = vals[:, :-1] vals = vals.view(np.complex64).reshape(-1, ftchan, npol) # for incoherent, vals.shape=(ntint, nchan, npol) # for others, (1, ntint*nchan, npol) -> (ntint*nchan, 1, npol) if need_fine_channels: if dedisperse == 'by-channel': fine = fft(vals, axis=0, overwrite_x=True, **_fftargs) else: fine = vals.reshape(-1, 1, npol) else: # data already channelized if need_fine_channels: fine = fft(vals, axis=0, overwrite_x=True, **_fftargs) # have fine.shape=(ntint, fh.nchan, npol) if need_fine_channels: # Dedisperse. fine *= dd_coh # if dedisperse == 'by-channel' and oversample > 1: # fine.shape=(ntint*oversample, chan_in, npol) # =(coarse,fine,fh.chan, npol) # -> reshape(oversample, ntint, fh.nchan, npol) # want (ntint=fine, fh.nchan, oversample, npol) -> .transpose # fine = (fine.reshape(nchan / fh.nchan, -1, fh.nchan, npol) # .transpose(1, 2, 0, 3) # .reshape(-1, nchan, npol)) # now fine.shape=(ntint, nchan, npol) w/ nchan=1 for coherent vals = ifft(fine, axis=0, overwrite_x=True, **_fftargs) if dedisperse == 'coherent' and nchan > 1 and fh.nchan == 1: # final FT to get requested channels vals = vals.reshape(-1, nchan, npol) vals = fft(vals, axis=1, overwrite_x=True, **_fftargs) elif dedisperse == 'by-channel' and oversample > 1: vals = vals.reshape(-1, oversample, fh.nchan, npol) vals = fft(vals, axis=1, overwrite_x=True, **_fftargs) vals = vals.transpose(0, 2, 1, 3).reshape(-1, nchan, npol) # vals[time, chan, pol] if verbose >= 2: print("... dedispersed", end="") if npol == 1: power = vals.real**2 + vals.imag**2 else: p0 = vals[..., 0] p1 = vals[..., 1] power = np.empty(vals.shape[:-1] + (4,), np.float32) power[..., 0] = p0.real**2 + p0.imag**2 power[..., 1] = p0.real*p1.real + p0.imag*p1.imag power[..., 2] = p0.imag*p1.real - p0.real*p1.imag power[..., 3] = p1.real**2 + p1.imag**2 if verbose >= 2: print("... power", end="") # current sample positions and corresponding time in stream isr = j*(ntint // oversample) + np.arange(ntint // oversample) tsr = (isr*dtsample*oversample)[:, np.newaxis] if rfi_filter_power is not None: power = rfi_filter_power(power, tsr.squeeze()) print("... power RFI", end="") # correct for delay if needed if dedisperse in ['incoherent', 'by-channel']: # tsample.shape=(ntint/oversample, nchan_in) tsr = tsr - dt if do_waterfall: # # loop over corresponding positions in waterfall # for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): # if iw < nwsize: # add sum of corresponding samples # waterfall[iw, :] += np.sum(power[isr//ntw == iw], # axis=0)[ifreq] iw = np.round((tsr / dtsample / oversample).to(1) .value / ntw).astype(int) for k, kfreq in enumerate(ifreq): # sort in frequency while at it iwk = iw[:, (0 if iw.shape[1] == 1 else kfreq // oversample)] iwk = np.clip(iwk, 0, nwsize-1, out=iwk) iwkmin = iwk.min() iwkmax = iwk.max()+1 for ipow in range(npol**2): waterfall[iwkmin:iwkmax, k, ipow] += np.bincount( iwk-iwkmin, power[:, kfreq, ipow], iwkmax-iwkmin) if verbose >= 2: print("... waterfall", end="") if do_foldspec: ibin = (j*ntbin) // nt # bin in the time series: 0..ntbin-1 # times and cycles since start time of observation. tsample = tstart + tsr phase = (phasepol(tsample.to(u.s).value.ravel()) .reshape(tsample.shape)) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) for k, kfreq in enumerate(ifreq): # sort in frequency while at it iph = iphase[:, (0 if iphase.shape[1] == 1 else kfreq // oversample)] # sum and count samples by phase bin for ipow in range(npol**2): foldspec[ibin, k, :, ipow] += np.bincount( iph, power[:, kfreq, ipow], ngate) icount[ibin, k, :] += np.bincount( iph, power[:, kfreq, 0] != 0., ngate) if verbose >= 2: print("... folded", end="") if verbose >= 2: print("... done") #Commented out as workaround, this was causing "Referenced before assignment" errors with JB data #if verbose >= 2 or verbose and mpi_rank == 0: # print('#{:4d}/{:4d} read {:6d} out of {:6d}' # .format(mpi_rank, mpi_size, j+1, nt)) if npol == 1: if do_foldspec: foldspec = foldspec.reshape(foldspec.shape[:-1]) if do_waterfall: waterfall = waterfall.reshape(waterfall.shape[:-1]) return foldspec, icount, waterfall
def fold(fh1, dtype, samplerate, fedge, fedge_at_top, nchan, nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, rfi_filter_raw=None, rfi_filter_power=None, comm=None): """FFT ARO data, fold by phase/time and make a waterfall series Parameters ---------- fh1 : file handle handle to file holding voltage timeseries dtype : numpy dtype or '4bit' or '1bit' way the data are stored in the file samplerate : float rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation nskip : int number of records (ntint * nchan * 2 / 2 bytes) to skip ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) dedisperse : None or string None, 'incoherent', 'coherent', 'by-channel' do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets comm : MPI communicator (default None) """ if comm is None: rank = 0 size = 1 else: rank = comm.rank size = comm.size # initialize folded spectrum and waterfall foldspec = np.zeros((nchan, ngate, ntbin)) icount = np.zeros((nchan, ngate, ntbin), dtype=np.int64) nwsize = nt*ntint//ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (simple for ARO: 1 byte/sample) # double since we need to get ntint samples after FFT recsize = nchan*ntint*{np.int8: 2, '4bit': 1}[dtype] if verbose: print('Reading from {}'.format(fh1)) if nskip > 0: if verbose: print('Skipping {0} records = {1} bytes' .format(nskip, nskip*recsize)) # If MPI threading, the threads hop over one-another # and seeking is done in for-loop. if size == 1: fh1.seek(nskip * recsize) dt1 = (1./samplerate).to(u.s) # need 2*nchan real-valued samples for each FFT dtsample = nchan * 2 * dt1 tstart = dtsample * ntint * nskip # pre-calculate time delay due to dispersion in coarse channels freq = (fedge - rfftfreq(nchan*2, dt1.value) * u.Hz if fedge_at_top else fedge + rfftfreq(nchan*2, dt1.value) * u.Hz) # [::2] sets frequency channels to numerical recipes ordering dt = (dispersion_delay_constant * dm * (1./freq[::2]**2 - 1./fref**2)).to(u.s).value if dedisperse in {'coherent', 'by-channel'}: # pre-calculate required turns due to dispersion fcoh = (fedge - rfftfreq(nchan*2*ntint, dt1.value) * u.Hz if fedge_at_top else fedge + rfftfreq(nchan*2*ntint, dt1.value) * u.Hz) # set frequency relative to which dispersion is coherently corrected if dedisperse == 'coherent': _fref = fref else: # _fref = np.round((fcoh * dtsample).to(1).value) / dtsample _fref = np.repeat(freq.value, ntint) * freq.unit # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1./_fref-1./fcoh)**2) * 360. * u.deg # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n] # for 0 and n need only real part, but for 1...n-1 need real, imag # so just get shifts for r[1], r[2], ..., r[n-1] dang = dang.to(u.rad).value[1:-1:2] dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) for j in xrange(rank, nt, size): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j+1, nt, (tstart+dtsample*j*ntint).value)) # time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # data just a series of bytes, each containing one 8 bit or # two 4-bit samples (set by dtype in caller) if size > 1: fh1.seek((j+nskip)*recsize) raw = fromfile(fh1, dtype, recsize) except(EOFError, IOError) as exc: print("Hit {}; writing pgm's".format(exc)) break if verbose == 'very': print("Read {} items".format(raw.size), end="") if rfi_filter_raw: raw = rfi_filter_raw(raw) print("... raw RFI", end="") vals = raw.astype(np.float32) if dedisperse in {'coherent', 'by-channel'}: fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs) fine_cmplx = fine[1:-1].view(np.complex64) fine_cmplx *= dd_coh # this overwrites parts of fine, as intended vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs) if verbose == 'very': print("... dedispersed", end="") chan2 = rfft(vals.reshape(-1, nchan*2), axis=-1, overwrite_x=True, **_fftargs)**2 # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2] # re-order to Num.Rec. format: Re[0], Re[n/2], Re[1], .... power = np.hstack((chan2[:,:1]+chan2[:,-1:], chan2[:,1:-1].reshape(-1,nchan-1,2).sum(-1))) if verbose == 'very': print("... power", end="") if rfi_filter_power: power = rfi_filter_power(power) print("... power RFI", end="") # current sample positions in stream isr = j*ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:,iw] += np.sum(power[isr//ntw == iw], axis=0) if verbose == 'very': print("... waterfall", end="") if do_foldspec: tsample = (tstart + isr*dtsample).value # times since start ibin = j*ntbin//nt # bin in the time series: 0..ntbin-1 for k in xrange(nchan): if dedisperse == 'coherent': t = tsample # already dedispersed else: t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k, :, ibin] += np.bincount(iphase, power[:, k], ngate) icount[k, :, ibin] += np.bincount(iphase, power[:, k] != 0., ngate) if verbose == 'very': print("... folded", end="") if verbose == 'very': print("... done") if verbose: print('read {0:6d} out of {1:6d}'.format(j+1, nt)) return foldspec, icount, waterfall