def _fourier_cross(self, lc1, lc2): """ Fourier transform the two light curves, then compute the cross spectrum. Computed as CS = lc1 x lc2* (where lc2 is the one that gets complex-conjugated) Parameters ---------- lc1: :class:`stingray.Lightcurve` object One light curve to be Fourier transformed. Ths is the band of interest or channel of interest. lc2: :class:`stingray.Lightcurve` object Another light curve to be Fourier transformed. This is the reference band. Returns ------- fr: numpy.ndarray The squared absolute value of the Fourier amplitudes """ fourier_1 = fft(lc1.counts) # do Fourier transform 1 fourier_2 = fft(lc2.counts) # do Fourier transform 2 freqs = fftfreq(lc1.n, lc1.dt) cross = np.multiply(fourier_1[freqs > 0], np.conj(fourier_2[freqs > 0])) return freqs[freqs > 0], cross
def get_fft_freq(self, freq, ntint, dm): dtsample = 2 * self.nfreq * self.dt fcoh = freq - fftfreq(ntint, dtsample)[:, np.newaxis] _fref = freq[np.newaxis] dang = (self.dispersion_delay_constant * dm * fcoh * (1. / _fref - 1. / fcoh)**2) dd_coh = np.exp(-1j * dang).astype(np.complex64) return dd_coh
def get_fft_freq(self, freq, ntint, dm): dtsample = 2 * self.nfreq * self.dt fcoh = freq - fftfreq( ntint, dtsample)[:, np.newaxis] _fref = freq[np.newaxis] dang = (self.dispersion_delay_constant * dm * fcoh * (1./_fref - 1./fcoh)**2) dd_coh = np.exp(-1j * dang).astype(np.complex64) return dd_coh
def fold(fh, comm, samplerate, fedge, fedge_at_top, nchan, nt, ntint, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, rfi_filter_raw=None, rfi_filter_power=None, return_fits=False): """ FFT data, fold by phase/time and make a waterfall series Folding is done from the position the file is currently in Parameters ---------- fh : file handle handle to file holding voltage timeseries comm: MPI communicator or None will use size, rank attributes samplerate : Quantity rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of the file that is read. dedisperse : None or string (default: incoherent). None, 'incoherent', 'coherent', 'by-channel'. Note: None really does nothing do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool or int whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets return_fits : bool (default: False) return a subint fits table for rank == 0 (None otherwise) """ assert dedisperse in (None, 'incoherent', 'by-channel', 'coherent') assert nchan % fh.nchan == 0 if dedisperse == 'by-channel': oversample = nchan // fh.nchan assert ntint % oversample == 0 else: oversample = 1 if dedisperse == 'coherent' and fh.nchan > 1: raise ValueError("For coherent dedispersion, data must be " "unchannelized before folding.") if comm is None: mpi_rank = 0 mpi_size = 1 else: mpi_rank = comm.rank mpi_size = comm.size npol = getattr(fh, 'npol', 1) assert npol == 1 or npol == 2 if verbose > 1 and mpi_rank == 0: print("Number of polarisations={}".format(npol)) # initialize folded spectrum and waterfall # TODO: use estimated number of points to set dtype if do_foldspec: foldspec = np.zeros((ntbin, nchan, ngate, npol**2), dtype=np.float32) icount = np.zeros((ntbin, nchan, ngate), dtype=np.int32) else: foldspec = None icount = None if do_waterfall: nwsize = nt * ntint // ntw waterfall = np.zeros((nwsize, nchan, npol**2), dtype=np.float64) else: waterfall = None if verbose and mpi_rank == 0: print('Reading from {}'.format(fh)) nskip = fh.tell() / fh.blocksize if nskip > 0: if verbose and mpi_rank == 0: print('Starting {0} blocks = {1} bytes out from start.'.format( nskip, nskip * fh.blocksize)) dt1 = (1. / samplerate).to(u.s) # need 2*nchan real-valued samples for each FFT if fh.telescope == 'lofar': dtsample = fh.dtsample else: dtsample = nchan // oversample * 2 * dt1 tstart = dtsample * ntint * nskip # pre-calculate time delay due to dispersion in coarse channels # for channelized data, frequencies are known if fh.nchan == 1: if getattr(fh, 'data_is_complex', False): # for complex data, really each complex sample consists of # 2 real ones, so multiply dt1 by 2. if fedge_at_top: freq = fedge - fftfreq(nchan, 2. * dt1.value) * u.Hz else: freq = fedge + fftfreq(nchan, 2. * dt1.value) * u.Hz else: if fedge_at_top: freq = fedge - rfftfreq(nchan * 2, dt1.value)[::2] * u.Hz else: freq = fedge + rfftfreq(nchan * 2, dt1.value)[::2] * u.Hz freq_in = freq else: # input frequencies may not be the ones going out freq_in = fh.frequencies if oversample == 1: freq = freq_in else: if fedge_at_top: freq = (freq_in[:, np.newaxis] - u.Hz * fftfreq(oversample, dtsample.value)) else: freq = (freq_in[:, np.newaxis] + u.Hz * fftfreq(oversample, dtsample.value)) ifreq = freq.ravel().argsort() # pre-calculate time offsets in (input) channelized streams dt = dispersion_delay_constant * dm * (1. / freq_in**2 - 1. / fref**2) if dedisperse in ['coherent', 'by-channel']: # pre-calculate required turns due to dispersion if fedge_at_top: fcoh = (freq_in[np.newaxis, :] - u.Hz * fftfreq(ntint, dtsample.value)[:, np.newaxis]) else: fcoh = (freq_in[np.newaxis, :] + u.Hz * fftfreq(ntint, dtsample.value)[:, np.newaxis]) # set frequency relative to which dispersion is coherently corrected if dedisperse == 'coherent': _fref = fref else: _fref = freq_in[np.newaxis, :] # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astronomy dang = (dispersion_delay_constant * dm * fcoh * (1. / _fref - 1. / fcoh)**2) * u.cycle with u.set_enabled_equivalencies(u.dimensionless_angles()): dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) # add dimension for polarisation dd_coh = dd_coh[..., np.newaxis] # Calculate the part of the whole file this node should handle. size_per_node = (nt - 1) // mpi_size + 1 start_block = mpi_rank * size_per_node end_block = min((mpi_rank + 1) * size_per_node, nt) for j in range(start_block, end_block): if verbose and j % progress_interval == 0: print('#{:4d}/{:4d} is doing {:6d}/{:6d} [={:6d}/{:6d}]; ' 'time={:18.12f}'.format( mpi_rank, mpi_size, j + 1, nt, j - start_block + 1, end_block - start_block, (tstart + dtsample * j * ntint).value)) # time since start # Just in case numbers were set wrong -- break if file ends; # better keep at least the work done. try: raw = fh.seek_record_read(int((nskip + j) * fh.blocksize), fh.blocksize) except (EOFError, IOError) as exc: print("Hit {0!r}; writing data collected.".format(exc)) break if verbose >= 2: print("#{:4d}/{:4d} read {} items".format(mpi_rank, mpi_size, raw.size), end="") if npol == 2: # multiple polarisations raw = raw.view(raw.dtype.fields.values()[0][0]) if fh.nchan == 1: # raw.shape=(ntint*npol) raw = raw.reshape(-1, npol) else: # raw.shape=(ntint, nchan*npol) raw = raw.reshape(-1, fh.nchan, npol) if rfi_filter_raw is not None: raw, ok = rfi_filter_raw(raw) if verbose >= 2: print("... raw RFI (zap {0}/{1})".format( np.count_nonzero(~ok), ok.size), end="") if np.can_cast(raw.dtype, np.float32): vals = raw.astype(np.float32) else: assert raw.dtype.kind == 'c' vals = raw if fh.nchan == 1: # have real-valued time stream of complex baseband # if we need some coherentdedispersion, do FT of whole thing, # otherwise to output channels if raw.dtype.kind == 'c': ftchan = nchan if dedisperse == 'incoherent' else len(vals) vals = fft(vals.reshape(-1, ftchan, npol), axis=1, overwrite_x=True, **_fftargs) else: # real data ftchan = nchan if dedisperse == 'incoherent' else len( vals) // 2 vals = rfft(vals.reshape(-1, ftchan * 2, npol), axis=1, overwrite_x=True, **_fftargs) # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2] # re-order to normal fft format (like Numerical Recipes): # Re[0], Re[n], Re[1], Im[1], .... (channel 0 is junk anyway) vals = np.hstack( (vals[:, 0], vals[:, -1], vals[:, 1:-1])).view(np.complex64) # for incoherent, vals.shape=(ntint, nchan, npol) -> OK # for others, have (1, ntint*nchan, npol) # reshape(nchan, ntint) gives rough as slowly varying -> .T if dedisperse != 'incoherent': fine = vals.reshape(nchan, -1, npol).transpose(1, 0, 2) # now have fine.shape=(ntint, nchan, npol) else: # data already channelized if dedisperse == 'by-channel': fine = fft(vals, axis=0, overwrite_x=True, **_fftargs) # have fine.shape=(ntint, fh.nchan, npol) if dedisperse in ['coherent', 'by-channel']: fine *= dd_coh # rechannelize to output channels if oversample > 1 and dedisperse == 'by-channel': # fine.shape=(ntint*oversample, chan_in, npol) # =(coarse,fine,fh.chan, npol) # -> reshape(oversample, ntint, fh.nchan, npol) # want (ntint=fine, fh.nchan, oversample, npol) -> .transpose fine = (fine.reshape(oversample, -1, fh.nchan, npol).transpose( 1, 2, 0, 3).reshape(-1, nchan, npol)) # now, for both, fine.shape=(ntint, nchan, npol) vals = ifft(fine, axis=0, overwrite_x=True, **_fftargs) # vals[time, chan, pol] if verbose >= 2: print("... dedispersed", end="") if npol == 1: power = vals.real**2 + vals.imag**2 else: p0 = vals[..., 0] p1 = vals[..., 1] power = np.empty(vals.shape[:-1] + (4, ), np.float32) power[..., 0] = p0.real**2 + p0.imag**2 power[..., 1] = p0.real * p1.real + p0.imag * p1.imag power[..., 2] = p0.imag * p1.real - p0.real * p1.imag power[..., 3] = p1.real**2 + p1.imag**2 if verbose >= 2: print("... power", end="") if rfi_filter_power is not None: power = rfi_filter_power(power) print("... power RFI", end="") # current sample positions in stream isr = j * (ntint // oversample) + np.arange(ntint // oversample) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[iw, :] += np.sum(power[isr // ntw == iw], axis=0)[ifreq] if verbose >= 2: print("... waterfall", end="") if do_foldspec: ibin = (j * ntbin) // nt # bin in the time series: 0..ntbin-1 # times since start tsample = (tstart + isr * dtsample * oversample)[:, np.newaxis] # correct for delay if needed if dedisperse in ['incoherent', 'by-channel']: # tsample.shape=(ntint/oversample, nchan_in) tsample = tsample - dt phase = (phasepol(tsample.to(u.s).value.ravel()).reshape( tsample.shape)) # corresponding PSR phases iphase = np.remainder(phase * ngate, ngate).astype(np.int) for k, kfreq in enumerate(ifreq): # sort in frequency while at it iph = iphase[:, (0 if iphase.shape[1] == 1 else kfreq // oversample)] # sum and count samples by phase bin for ipow in xrange(npol**2): foldspec[ibin, k, :, ipow] += np.bincount(iph, power[:, kfreq, ipow], ngate) icount[ibin, k, :] += np.bincount(iph, power[:, kfreq, 0] != 0., ngate) if verbose >= 2: print("... folded", end="") if verbose >= 2: print("... done") #Commented out as workaround, this was causing "Referenced before assignment" errors with JB data #if verbose >= 2 or verbose and mpi_rank == 0: # print('#{:4d}/{:4d} read {:6d} out of {:6d}' # .format(mpi_rank, mpi_size, j+1, nt)) if npol == 1: if do_foldspec: foldspec = foldspec.reshape(foldspec.shape[:-1]) if do_waterfall: waterfall = waterfall.reshape(waterfall.shape[:-1]) return foldspec, icount, waterfall
def correlate(fh1, fh2, dm, nchan, ngate, ntbin, nt, ntw, dedisperse='incoherent', rfi_filter_raw=None, fref=_fref, save_xcorr=True, do_foldspec=True, phasepol=None, do_waterfall=True, t0=None, t1=None, comm=None, verbose=2): """ fh1 : file handle of first data stream fh2 : file handle of second data stream dm : nchan : t0 : start time (isot) x-corr [None] start at common beginning of (fh1, fh2) t1 : end time of (isot) x-corr [None] end at common ending of (fh1, fh2) comm : MPI communicator or None """ fhs = [fh1, fh2] if comm is None: rank = 0 size = 1 else: rank = comm.rank size = comm.size # find nearest starttime landing on same sample if t0 is None: t0 = max(fh1.time0, fh2.time0) print("Starting at %s" % t0) t0 = Time(t0, scale='utc') t1 = Time(t1, scale='utc') # find time offset between the two fh's, accomodating the relative phase # delay of the pulsar (the propagation delay) phases = [phasepol[i]((t0 - fhs[i].time0).sec) for i in [0, 1]] F0 = np.mean([phasepol[i].deriv(1)((t0 - fhs[i].time0).sec) for i in [0, 1]]) # propagation delay offset from fh1 dts = [0. * u.s, np.diff(phases)[0] / F0 * u.s] if rank == 0: print("Will read fh2 ({0}) {1} ahead of fh1 ({2}) " "for propagation delay".format(fh2.telescope, dts[1].to(u.millisecond), fh1.telescope)) # prep the fhs for xcorr stream, setting up channelization, dedispersion... for i, fh in enumerate(fhs): fh.seek(t0) # byte offset for propagation delay fh.prop_delay = int(round(dts[i] / fh.dtsample)) * fh.recordsize fh.dt1 = (1. / fh.samplerate).to(u.s) fh.this_nskip = fh.nskip(t0) if rank == 1: return None # set up FFT functions: real vs complex fft's if fh.nchan > 1: fh.thisfft = fft fh.thisifft = ifft fh.thisfftfreq = fftfreq else: fh.thisfft = rfft fh.thisifft = irfft fh.thisfftfreq = rfftfreq # pre-calculate time delay due to dispersion in coarse channels # LOFAR data is already channelized if fh.nchan > 1: fh.freq = fh.frequencies else: if fh.fedge_at_top: fh.freq = fh.fedge\ - fh.thisfftfreq(nchan * 2, fh.dt1.value) * u.Hz else: fh.freq = fh.fedge\ + fh.thisfftfreq(nchan * 2, fh.dt1.value) * u.Hz # [::2] sets frequency channels to numerical recipes ordering # or, rfft has an unusual ordering fh.freq = fh.freq[::2] # sort channels from low --> high frequency if np.diff(fh.freq.value).mean() < 0.: if rank == 0 and verbose > 1: print("Will frequency-sort {0} data before x-corr" .format(fh.telescope)) fh.freqsort = True else: fh.freqsort = False fh.dt = (dispersion_delay_constant * dm * ( 1./fh.freq**2 - 1./fref**2) ).to(u.s).value # number of time bins to np.roll the channels for incoherent dedisperse if dedisperse == 'incoherent': fh.ndt = (fh.dt / fh.dtsample.to(u.s).value) fh.ndt = -1 * np.rint(fh.ndt).astype(np.int) elif dedisperse in ['coherent', 'by-channel']: # pre-calculate required turns due to dispersion if fh.nchan > 1: fcoh = (fh.freq[np.newaxis, :] + fftfreq(fh.ntint(nchan), fh.dtsample.value)[:, np.newaxis] * u.Hz) else: if fh.fedge_at_top: fcoh = fh.fedge - fh.thisfftfreq(nchan*2*fh.ntint(nchan), fh.dt1.value) * u.Hz else: fcoh = fh.fedge + fh.thisfftfreq(nchan*2*fh.ntint(nchan), fh.dt1.value) * u.Hz #set frequency relative to which dispersion is coherently corrected if dedisperse == 'coherent': _fref = fref else: #fref = np.round((fcoh * fh.dtsample).to(1).value)/fh.dtsample _fref = np.repeat(fh.freq.value, fh.ntint(nchan))*fh.freq.unit # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1./_fref-1./fcoh)**2) * 360. * u.deg if fh.thisfftfreq is rfftfreq: # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n] # for 0 and n need only real part, but for 1...n-1 need real, imag # so just get shifts for r[1], r[2], ..., r[n-1] dang = dang.to(u.rad).value[1:-1:2] else: dang = dang.to(u.rad).value fh.dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) #### done fh setup ### ## xcorr setup # data-reading params (to help read in same-size time chunks and average # onto the same time-and-frequency grids) (Rf, Tf, NUf, fkeep, freqs, rows) = data_averaging_coeffs(fh1, fh2) nrows = int(min(rows[0] * Tf[1] / Tf[0], rows[1] * Tf[0] / Tf[1])) # summarize the (re)sampling if rank == 0: tmp = fh1.dtsample.to(u.s).value * fh1.blocksize / fh1.recordsize*Rf[0] print("\nReading {0} blocks of fh1, {1} blocks of fh2, " "for equal timeblocks of {2} sec ".format(Rf[0], Rf[1], tmp)) if rank == 0 and verbose > 1: tmp = np.diff(freqs).mean() print("Averaging over {0} channels in fh1, {1} in fh2, for equal " "frequency bins of {2} MHz".format(NUf[0], NUf[1], tmp)) tmp = fh1.dtsample.to(u.s).value*Tf[0] print("Averaging over {0} timesteps in fh1, {1} in fh2, for equal " "samples of {2} s\n".format(Tf[0], Tf[1], tmp)) # check if we are averaging both fh's if rank == 0 and np.all(np.array(Tf) != 1): txt = "Note, averaging both fh's in time to have similar sample "\ "size. You may want to implement interpolation, or think "\ "more about this situation" print(txt) if rank == 0 and np.all(np.array(NUf) != 1): txt = "Note, averaging both fh's in freq to have similar sample "\ "size. You may want to implement interpolation, or think "\ "more about this situation" print(txt) # initialize the folded spectrum and waterfall nchans = min([len(fh.freq[fkeep[i]] / NUf[i]) for i, fh in enumerate(fhs)]) foldspec = np.zeros((nchans, ngate, ntbin)) icount = np.zeros((nchans, ngate, ntbin), dtype=np.int64) nwsize = min(nt * fh1.ntint(nchan) // ntw, nt * fh2.ntint(nchan) // ntw) waterfall = np.zeros((nchans, nwsize)) if save_xcorr: # output dataset outname = "{0}{1}_{2}_{3}.hdf5".format( fh1.telescope[0], fh2.telescope[0], t0, t1) # mpi doesn't like colons outname = outname.replace(':', '') fcorr = h5py.File(outname, 'w') # , driver='mpio', comm=comm) ## create the x-corr output file # save the frequency grids to help with future TODO: interpolate onto # same frequency grid. For now the frequencies fall within same bin if rank == 0 and verbose: print("Saving x-corr to %s\n" % outname) fcorr.create_dataset('freqs', data=np.hstack([f.to(u.MHz).value for f in freqs])) # the x-corr data [tsteps, channels] dset = fcorr.create_dataset('corr', (nrows, freqs[0].size), dtype='complex64', maxshape=(None, nchan)) dset.attrs.create('dedisperse', data=str(dedisperse)) dset.attrs.create('tsample', data=[fhs[i].dtsample.to(u.s).value * Tf[i] for i in [0, 1]]) dset.attrs.create('chanbw', data=np.diff(freqs).mean()) # start reading the data # this_nskip moves to 't0', rank is for MPI idx = rank raws = [fh.seek_record_read((fh.this_nskip + idx * Rf[i]) * fh.blocksize - fh.prop_delay, fh.blocksize * Rf[i]) for i, fh in enumerate(fhs)] endread = False print("read step (idx), fh1.time(), fh2.time() ") print("\t inclues {0} propagation delay".format(dts[1])) while np.all([raw.size > 0 for raw in raws]): if verbose: print("idx",idx, fh1.time(), fh2.time()) vals = raws chans = [None, None] tsamples = [None, None] isrs = [None, None] # prep the data (channelize, dedisperse, ...) for i, fh in enumerate(fhs): if rfi_filter_raw is not None: raws[i], ok = rfi_filter_raw(raws[i], nchan) if fh.telescope == 'aro': vals[i] = raws[i].astype(np.float32) else: vals[i] = raws[i] if dedisperse in ['coherent', 'by-channel']: fine = fh.thisfft(vals[i], axis=0, overwrite_x=True, **_fftargs) if fh.thisfft is rfft: fine_cmplx = fine[1:-1].view(np.complex64) # overwrites parts of fine, as intended fine_cmplx *= fh.dd_coh else: fine *= dd_coh vals[i] = fh.thisifft(fine, axis=0, overwrite_x=True, **_fftargs) if fh.nchan == 1: # ARO data should fall here chans[i] = fh.thisfft(vals[i].reshape(-1, nchan * 2), axis=-1, overwrite_x=True, **_fftargs) else: # lofar and gmrt-phased are already channelised chans[i] = vals[i] # dedisperse on original (raw) time/freq grid # TODO: profile for speedup if dedisperse == 'incoherent': for ci, v in enumerate(fh.ndt): chans[i][..., ci] = np.roll(chans[i][..., ci], v, axis=0) # average onto same time grid chans[i] = chans[i].reshape(Tf[i], chans[i].shape[0] / Tf[i], -1)\ .mean(axis=0) # average onto same freq grid chans[i] = chans[i][..., fkeep[i]] chans[i] = chans[i].reshape(-1, chans[i].shape[1] / NUf[i], NUf[i]).mean(axis=-1) # current sample positions in stream # (each averaged onto same time grid) isrs[i] = idx * rows[i] + np.arange(rows[i]) tsamples[i] = (fh.this_nskip * fh.dtsample * fh.ntint(nchan) + isrs[i] * fh.dtsample) tsamples[i] = tsamples[i].reshape(-1, Tf[i]).mean(axis=-1) # finally sort the channels low --> high (if necessary) # before x-correlating if fh.freqsort: # TODO: need to think about ordering chans[i] = chans[i][..., ::-1] # x-correlate xpower = chans[0] * chans[1].conjugate() if do_waterfall: # loop over corresponding positions in waterfall isr = idx * nrows + np.arange(nrows) for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[0:xpower.shape[1], iw] += \ np.abs(np.sum(xpower[isr // ntw == iw], axis=0)) if do_foldspec: # time since start (average of the two streams) # TODO: think about this: take one stream, both, average, ... #tsample = np.mean(tsamples, axis=0) tsample = np.array(tsamples) # timeseries already dedispersed phase = phasepol[0](tsample[0]) iphase = np.remainder(phase * ngate, ngate).astype(np.int) # bin in the time series: 0..ntbin-1 ibin = idx * ntbin // nt for k in xrange(nchans): # equally xpower.shape[1] foldspec[k, :, ibin] += np.bincount(iphase, np.abs(xpower[:, k]), ngate) icount[k, :, ibin] += np.bincount(iphase, np.abs(xpower[:, k]) != 0., ngate) if save_xcorr: curshape = dset.shape nx = max(nrows * (idx + 1), curshape[0]) dset.resize((nx + nrows, curshape[1])) # TODO: h5py mpio stalls here... turn off save_xcorr for mpirun dset[nrows * idx: nrows * (idx + 1)] = xpower # read in next dataset if we haven't hit t1 yet for fh in [fh1, fh2]: if (fh.time() - t1).sec > 0.: endread = True if endread: break else: idx += size raws = [fh.seek_record_read((fh.this_nskip + idx * Rf[i]) * fh.blocksize - fh.prop_delay, fh.blocksize * Rf[i]) for i, fh in enumerate(fhs)] if save_xcorr: fcorr.close() return foldspec, icount, waterfall
def correlate(fh1, fh2, dm, nchan, ngate, ntbin, nt, ntw, dedisperse='incoherent', rfi_filter_raw=None, fref=_fref, save_xcorr=True, do_foldspec=True, phasepol=None, do_waterfall=True, t0=None, t1=None, comm=None, verbose=2): """ fh1 : file handle of first data stream fh2 : file handle of second data stream dm : nchan : t0 : start time (isot) x-corr [None] start at common beginning of (fh1, fh2) t1 : end time of (isot) x-corr [None] end at common ending of (fh1, fh2) comm : MPI communicator or None """ fhs = [fh1, fh2] if comm is None: rank = 0 size = 1 else: rank = comm.rank size = comm.size # find nearest starttime landing on same sample if t0 is None: t0 = max(fh1.time0, fh2.time0) print("Starting at %s" % t0) t0 = Time(t0, scale='utc') t1 = Time(t1, scale='utc') # find time offset between the two fh's, accomodating the relative phase # delay of the pulsar (the propagation delay) phases = [phasepol[i]((t0 - fhs[i].time0).sec) for i in [0, 1]] F0 = np.mean( [phasepol[i].deriv(1)((t0 - fhs[i].time0).sec) for i in [0, 1]]) # propagation delay offset from fh1 dts = [0. * u.s, np.diff(phases)[0] / F0 * u.s] if rank == 0: print("Will read fh2 ({0}) {1} ahead of fh1 ({2}) " "for propagation delay".format(fh2.telescope, dts[1].to(u.millisecond), fh1.telescope)) # prep the fhs for xcorr stream, setting up channelization, dedispersion... for i, fh in enumerate(fhs): fh.seek(t0) # byte offset for propagation delay fh.prop_delay = int(round(dts[i] / fh.dtsample)) * fh.recordsize fh.dt1 = (1. / fh.samplerate).to(u.s) fh.this_nskip = fh.nskip(t0) if rank == 1: return None # set up FFT functions: real vs complex fft's if fh.nchan > 1: fh.thisfft = fft fh.thisifft = ifft fh.thisfftfreq = fftfreq else: fh.thisfft = rfft fh.thisifft = irfft fh.thisfftfreq = rfftfreq # pre-calculate time delay due to dispersion in coarse channels # LOFAR data is already channelized if fh.nchan > 1: fh.freq = fh.frequencies else: if fh.fedge_at_top: fh.freq = fh.fedge\ - fh.thisfftfreq(nchan * 2, fh.dt1.value) * u.Hz else: fh.freq = fh.fedge\ + fh.thisfftfreq(nchan * 2, fh.dt1.value) * u.Hz # [::2] sets frequency channels to numerical recipes ordering # or, rfft has an unusual ordering fh.freq = fh.freq[::2] # sort channels from low --> high frequency if np.diff(fh.freq.value).mean() < 0.: if rank == 0 and verbose > 1: print("Will frequency-sort {0} data before x-corr".format( fh.telescope)) fh.freqsort = True else: fh.freqsort = False fh.dt = (dispersion_delay_constant * dm * (1. / fh.freq**2 - 1. / fref**2)).to(u.s).value # number of time bins to np.roll the channels for incoherent dedisperse if dedisperse == 'incoherent': fh.ndt = (fh.dt / fh.dtsample.to(u.s).value) fh.ndt = -1 * np.rint(fh.ndt).astype(np.int) elif dedisperse in ['coherent', 'by-channel']: # pre-calculate required turns due to dispersion if fh.nchan > 1: fcoh = (fh.freq[np.newaxis, :] + fftfreq( fh.ntint(nchan), fh.dtsample.value)[:, np.newaxis] * u.Hz) else: if fh.fedge_at_top: fcoh = fh.fedge - fh.thisfftfreq( nchan * 2 * fh.ntint(nchan), fh.dt1.value) * u.Hz else: fcoh = fh.fedge + fh.thisfftfreq( nchan * 2 * fh.ntint(nchan), fh.dt1.value) * u.Hz #set frequency relative to which dispersion is coherently corrected if dedisperse == 'coherent': _fref = fref else: #fref = np.round((fcoh * fh.dtsample).to(1).value)/fh.dtsample _fref = np.repeat(fh.freq.value, fh.ntint(nchan)) * fh.freq.unit # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1. / _fref - 1. / fcoh)**2) * 360. * u.deg if fh.thisfftfreq is rfftfreq: # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n] # for 0 and n need only real part, but for 1...n-1 need real, imag # so just get shifts for r[1], r[2], ..., r[n-1] dang = dang.to(u.rad).value[1:-1:2] else: dang = dang.to(u.rad).value fh.dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) #### done fh setup ### ## xcorr setup # data-reading params (to help read in same-size time chunks and average # onto the same time-and-frequency grids) (Rf, Tf, NUf, fkeep, freqs, rows) = data_averaging_coeffs(fh1, fh2) nrows = int(min(rows[0] * Tf[1] / Tf[0], rows[1] * Tf[0] / Tf[1])) # summarize the (re)sampling if rank == 0: tmp = fh1.dtsample.to( u.s).value * fh1.blocksize / fh1.recordsize * Rf[0] print("\nReading {0} blocks of fh1, {1} blocks of fh2, " "for equal timeblocks of {2} sec ".format(Rf[0], Rf[1], tmp)) if rank == 0 and verbose > 1: tmp = np.diff(freqs).mean() print("Averaging over {0} channels in fh1, {1} in fh2, for equal " "frequency bins of {2} MHz".format(NUf[0], NUf[1], tmp)) tmp = fh1.dtsample.to(u.s).value * Tf[0] print("Averaging over {0} timesteps in fh1, {1} in fh2, for equal " "samples of {2} s\n".format(Tf[0], Tf[1], tmp)) # check if we are averaging both fh's if rank == 0 and np.all(np.array(Tf) != 1): txt = "Note, averaging both fh's in time to have similar sample "\ "size. You may want to implement interpolation, or think "\ "more about this situation" print(txt) if rank == 0 and np.all(np.array(NUf) != 1): txt = "Note, averaging both fh's in freq to have similar sample "\ "size. You may want to implement interpolation, or think "\ "more about this situation" print(txt) # initialize the folded spectrum and waterfall nchans = min([len(fh.freq[fkeep[i]] / NUf[i]) for i, fh in enumerate(fhs)]) foldspec = np.zeros((nchans, ngate, ntbin)) icount = np.zeros((nchans, ngate, ntbin), dtype=np.int64) nwsize = min(nt * fh1.ntint(nchan) // ntw, nt * fh2.ntint(nchan) // ntw) waterfall = np.zeros((nchans, nwsize)) if save_xcorr: # output dataset outname = "{0}{1}_{2}_{3}.hdf5".format(fh1.telescope[0], fh2.telescope[0], t0, t1) # mpi doesn't like colons outname = outname.replace(':', '') fcorr = h5py.File(outname, 'w') # , driver='mpio', comm=comm) ## create the x-corr output file # save the frequency grids to help with future TODO: interpolate onto # same frequency grid. For now the frequencies fall within same bin if rank == 0 and verbose: print("Saving x-corr to %s\n" % outname) fcorr.create_dataset('freqs', data=np.hstack([f.to(u.MHz).value for f in freqs])) # the x-corr data [tsteps, channels] dset = fcorr.create_dataset('corr', (nrows, freqs[0].size), dtype='complex64', maxshape=(None, nchan)) dset.attrs.create('dedisperse', data=str(dedisperse)) dset.attrs.create( 'tsample', data=[fhs[i].dtsample.to(u.s).value * Tf[i] for i in [0, 1]]) dset.attrs.create('chanbw', data=np.diff(freqs).mean()) # start reading the data # this_nskip moves to 't0', rank is for MPI idx = rank raws = [ fh.seek_record_read( (fh.this_nskip + idx * Rf[i]) * fh.blocksize - fh.prop_delay, fh.blocksize * Rf[i]) for i, fh in enumerate(fhs) ] endread = False print("read step (idx), fh1.time(), fh2.time() ") print("\t inclues {0} propagation delay".format(dts[1])) while np.all([raw.size > 0 for raw in raws]): if verbose: print("idx", idx, fh1.time(), fh2.time()) vals = raws chans = [None, None] tsamples = [None, None] isrs = [None, None] # prep the data (channelize, dedisperse, ...) for i, fh in enumerate(fhs): if rfi_filter_raw is not None: raws[i], ok = rfi_filter_raw(raws[i], nchan) if fh.telescope == 'aro': vals[i] = raws[i].astype(np.float32) else: vals[i] = raws[i] if dedisperse in ['coherent', 'by-channel']: fine = fh.thisfft(vals[i], axis=0, overwrite_x=True, **_fftargs) if fh.thisfft is rfft: fine_cmplx = fine[1:-1].view(np.complex64) # overwrites parts of fine, as intended fine_cmplx *= fh.dd_coh else: fine *= dd_coh vals[i] = fh.thisifft(fine, axis=0, overwrite_x=True, **_fftargs) if fh.nchan == 1: # ARO data should fall here chans[i] = fh.thisfft(vals[i].reshape(-1, nchan * 2), axis=-1, overwrite_x=True, **_fftargs) else: # lofar and gmrt-phased are already channelised chans[i] = vals[i] # dedisperse on original (raw) time/freq grid # TODO: profile for speedup if dedisperse == 'incoherent': for ci, v in enumerate(fh.ndt): chans[i][..., ci] = np.roll(chans[i][..., ci], v, axis=0) # average onto same time grid chans[i] = chans[i].reshape(Tf[i], chans[i].shape[0] / Tf[i], -1)\ .mean(axis=0) # average onto same freq grid chans[i] = chans[i][..., fkeep[i]] chans[i] = chans[i].reshape(-1, chans[i].shape[1] / NUf[i], NUf[i]).mean(axis=-1) # current sample positions in stream # (each averaged onto same time grid) isrs[i] = idx * rows[i] + np.arange(rows[i]) tsamples[i] = (fh.this_nskip * fh.dtsample * fh.ntint(nchan) + isrs[i] * fh.dtsample) tsamples[i] = tsamples[i].reshape(-1, Tf[i]).mean(axis=-1) # finally sort the channels low --> high (if necessary) # before x-correlating if fh.freqsort: # TODO: need to think about ordering chans[i] = chans[i][..., ::-1] # x-correlate xpower = chans[0] * chans[1].conjugate() if do_waterfall: # loop over corresponding positions in waterfall isr = idx * nrows + np.arange(nrows) for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[0:xpower.shape[1], iw] += \ np.abs(np.sum(xpower[isr // ntw == iw], axis=0)) if do_foldspec: # time since start (average of the two streams) # TODO: think about this: take one stream, both, average, ... #tsample = np.mean(tsamples, axis=0) tsample = np.array(tsamples) # timeseries already dedispersed phase = phasepol[0](tsample[0]) iphase = np.remainder(phase * ngate, ngate).astype(np.int) # bin in the time series: 0..ntbin-1 ibin = idx * ntbin // nt for k in xrange(nchans): # equally xpower.shape[1] foldspec[k, :, ibin] += np.bincount(iphase, np.abs(xpower[:, k]), ngate) icount[k, :, ibin] += np.bincount(iphase, np.abs(xpower[:, k]) != 0., ngate) if save_xcorr: curshape = dset.shape nx = max(nrows * (idx + 1), curshape[0]) dset.resize((nx + nrows, curshape[1])) # TODO: h5py mpio stalls here... turn off save_xcorr for mpirun dset[nrows * idx:nrows * (idx + 1)] = xpower # read in next dataset if we haven't hit t1 yet for fh in [fh1, fh2]: if (fh.time() - t1).sec > 0.: endread = True if endread: break else: idx += size raws = [ fh.seek_record_read( (fh.this_nskip + idx * Rf[i]) * fh.blocksize - fh.prop_delay, fh.blocksize * Rf[i]) for i, fh in enumerate(fhs) ] if save_xcorr: fcorr.close() return foldspec, icount, waterfall
def fold(fh, comm, samplerate, fedge, fedge_at_top, nchan, nt, ntint, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, rfi_filter_raw=None, rfi_filter_power=None, return_fits=True): """ FFT data, fold by phase/time and make a waterfall series Folding is done from the position the file is currently in Parameters ---------- fh : file handle handle to file holding voltage timeseries comm: MPI communicator or None samplerate : float rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) dedisperse : None or string (default: incoherent). None, 'incoherent', 'coherent', 'by-channel'. Note: None really does nothing do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool or int whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets return_fits : bool (default: True) return a subint fits table for rank == 0 (None otherwise) """ if comm is None: rank = 0 size = 1 else: rank = comm.rank size = comm.size # initialize folded spectrum and waterfall foldspec = np.zeros((nchan, ngate, ntbin)) icount = np.zeros((nchan, ngate, ntbin), dtype=np.int64) nwsize = nt*ntint//ntw waterfall = np.zeros((nchan, nwsize)) if verbose and rank == 0: print('Reading from {}'.format(fh)) nskip = fh.tell()/fh.blocksize if nskip > 0: if verbose and rank == 0: print('Starting {0} blocks = {1} bytes out from start.' .format(nskip, nskip*fh.blocksize)) dt1 = (1./fh.samplerate).to(u.s) # need 2*nchan real-valued samples for each FFT if fh.telescope == 'lofar': dtsample = fh.dtsample else: dtsample = nchan * 2 * dt1 tstart = dtsample * ntint * nskip # set up FFT functions: real vs complex fft's if fh.nchan > 1: thisfft = fft thisifft = ifft thisfftfreq = fftfreq else: thisfft = rfft thisifft = irfft thisfftfreq = rfftfreq # pre-calculate time delay due to dispersion in coarse channels # LOFAR data is already channelized if fh.nchan > 1: freq = fh.frequencies else: if fedge_at_top: freq = fedge - thisfftfreq(nchan*2, dt1.value) * u.Hz else: freq = fedge + thisfftfreq(nchan*2, dt1.value) * u.Hz # sort lowest to highest freq # freq.sort() # [::2] sets frequency channels to numerical recipes ordering # or, rfft has an unusual ordering freq = freq[::2] dt = (dispersion_delay_constant * dm * (1./freq**2 - 1./fref**2)).to(u.s).value if dedisperse in ['coherent', 'by-channel']: # pre-calculate required turns due to dispersion if fh.nchan > 1: fcoh = (freq[np.newaxis,:] + fftfreq(ntint, dtsample.value)[:,np.newaxis] * u.Hz) else: if fedge_at_top: fcoh = fedge - thisfftfreq(nchan*2*ntint, dt1.value) * u.Hz else: fcoh = fedge + thisfftfreq(nchan*2*ntint, dt1.value) * u.Hz # set frequency relative to which dispersion is coherently corrected if dedisperse == 'coherent': _fref = fref else: # _fref = np.round((fcoh * dtsample).to(1).value) / dtsample _fref = np.repeat(freq.value, ntint) * freq.unit # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1./_fref-1./fcoh)**2) * 360. * u.deg if thisfftfreq is rfftfreq: # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n] # for 0 and n need only real part, but for 1...n-1 need real, imag # so just get shifts for r[1], r[2], ..., r[n-1] dang = dang.to(u.rad).value[1:-1:2] else: dang = dang.to(u.rad).value dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) for j in xrange(rank, nt, size): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j+1, nt, (tstart+dtsample*j*ntint).value)) # time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # ARO/GMRT return int-stream, # LOFAR returns complex64 (count/nchan, nchan) # LOFAR "combined" file class can do lots of seeks, we minimize # that with the 'seek_record_read' routine raw = fh.seek_record_read((nskip+j)*fh.blocksize, fh.blocksize) except(EOFError, IOError) as exc: print("Hit {0!r}; writing pgm's".format(exc)) break if verbose >= 2: print("Read {} items".format(raw.size), end="") if rfi_filter_raw is not None: raw, ok = rfi_filter_raw(raw, nchan) if verbose >= 2: print("... raw RFI (zap {0}/{1})" .format(np.count_nonzero(~ok), ok.size), end="") if fh.telescope == 'aro': vals = raw.astype(np.float32) else: vals = raw # TODO: for coherent dedispersion, need to undo existing channels # for lofar and gmrt-phased if dedisperse in ['coherent', 'by-channel']: fine = thisfft(vals, axis=0, overwrite_x=True, **_fftargs) if thisfft is rfft: fine_cmplx = fine[1:-1].view(np.complex64) fine_cmplx *= dd_coh # overwrites parts of fine, as intended else: fine *= dd_coh vals = thisifft(fine, axis=0, overwrite_x=True, **_fftargs) if verbose >= 2: print("... dedispersed", end="") if fh.nchan == 1: chan2 = thisfft(vals.reshape(-1, nchan*2), axis=-1, overwrite_x=True, **_fftargs)**2 # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2] # re-order to Num.Rec. format: Re[0], Re[n/2], Re[1], .... power = np.hstack((chan2[:,:1]+chan2[:,-1:], chan2[:,1:-1].reshape(-1,nchan-1,2).sum(-1))) else: # lofar and gmrt-phased are already channelised power = vals.real**2 + vals.imag**2 if verbose >= 2: print("... power", end="") if rfi_filter_power is not None: power = rfi_filter_power(power) print("... power RFI", end="") # current sample positions in stream isr = j*ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:,iw] += np.sum(power[isr//ntw == iw], axis=0) if verbose >= 2: print("... waterfall", end="") if do_foldspec: tsample = (tstart + isr*dtsample).value # times since start ibin = j*ntbin//nt # bin in the time series: 0..ntbin-1 for k in xrange(nchan): if dedisperse == 'coherent': t = tsample # already dedispersed elif dedisperse in ['incoherent', 'by-channel']: t = tsample - dt[k] # dedispersed times elif dedisperse is None: t = tsample # do nothing else: t = tsample - dt[k] phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k, :, ibin] += np.bincount(iphase, power[:, k], ngate) icount[k, :, ibin] += np.bincount(iphase, power[:, k] != 0., ngate) if verbose >= 2: print("... folded", end="") if verbose >= 2: print("... done") if verbose: print('read {0:6d} out of {1:6d}'.format(j+1, nt)) if return_fits and rank == 0: # subintu HDU # update table columns # TODO: allow multiple polarizations npol = 1 newcols = [] # FITS table creation difficulties... # assign data *after* 'new_table' creation array2assign = {} tsubint = ntint*dtsample for col in fh['subint'].columns: attrs = col.copy().__dict__ # remove non-init args for nn in ['_pseudo_unsigned_ints', '_dims', '_physical_values', 'dtype', '_phantom', 'array']: attrs.pop(nn, None) if col.name == 'TSUBINT': array2assign[col.name] = np.array(tsubint) elif col.name == 'OFFS_SUB': array2assign[col.name] = np.arange(ntbin) * tsubint elif col.name == 'DAT_FREQ': # TODO: sort from lowest freq. to highest # ('DATA') needs sorting as well array2assign[col.name] = freq.to(u.MHz).value.astype(np.double) attrs['format'] = '{0}D'.format(freq.size) elif col.name == 'DAT_WTS': array2assign[col.name] = np.ones(freq.size, dtype=np.float32) attrs['format'] = '{0}E'.format(freq.size) elif col.name == 'DAT_OFFS': array2assign[col.name] = np.zeros(freq.size*npol, dtype=np.float32) attrs['format'] = '{0}E'.format(freq.size*npol) elif col.name == 'DAT_SCL': array2assign[col.name] = np.ones(freq.size*npol, dtype=np.float32) attrs['format'] = '{0}E'.format(freq.size) elif col.name == 'DATA': array2assign[col.name] = np.zeros((ntbin, npol, freq.size, ngate), dtype='i1') attrs['dim'] = "({},{},{})".format(ngate, freq.size, npol) attrs['format'] = "{0}I".format(ngate*freq.size*npol) newcols.append(FITS.Column(**attrs)) newcoldefs = FITS.ColDefs(newcols) oheader = fh['SUBINT'].header.copy() newtable = FITS.new_table(newcoldefs, nrows=ntbin, header=oheader) # update the 'subint' header and create a new one to be returned # owing to the structure of the code (MPI), we need to assign # the 'DATA' outside of fold.py newtable.header.update('NPOL', 1) newtable.header.update('NBIN', ngate) newtable.header.update('NBIN_PRD', ngate) newtable.header.update('NCHAN', freq.size) newtable.header.update('INT_UNIT', 'PHS') newtable.header.update('TBIN', tsubint.to(u.s).value) chan_bw = np.abs(np.diff(freq.to(u.MHz).value).mean()) newtable.header.update('CHAN_BW', chan_bw) if dedisperse in ['coherent', 'by-channel', 'incoherent']: newtable.header.update('DM', dm.value) # finally assign the table data for name, array in array2assign.iteritems(): try: newtable.data.field(name)[:] = array except ValueError: print("FITS error... work in progress", name, array.shape, newtable.data.field(name)[:].shape) phdu = fh['PRIMARY'].copy() subinttable = FITS.HDUList([phdu, newtable]) subinttable[1].header.update('EXTNAME', 'SUBINT') subinttable['PRIMARY'].header.update('DATE-OBS', fh.time0.isot) subinttable['PRIMARY'].header.update('STT_IMJD', int(fh.time0.mjd)) subinttable['PRIMARY'].header.update( 'STT_SMJD', int(str(fh.time0.mjd - int(fh.time0.mjd))[2:])*86400) else: subinttable = FITS.HDUList([]) return foldspec, icount, waterfall, subinttable
def fold(fh, comm, samplerate, fedge, fedge_at_top, nchan, nt, ntint, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, rfi_filter_raw=None, rfi_filter_power=None, return_fits=False): """ FFT data, fold by phase/time and make a waterfall series Folding is done from the position the file is currently in Parameters ---------- fh : file handle handle to file holding voltage timeseries comm: MPI communicator or None will use size, rank attributes samplerate : Quantity rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of the file that is read. dedisperse : None or string (default: incoherent). None, 'incoherent', 'coherent', 'by-channel'. Note: None really does nothing do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool or int whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets return_fits : bool (default: False) return a subint fits table for rank == 0 (None otherwise) """ assert dedisperse in (None, 'incoherent', 'by-channel', 'coherent') need_fine_channels = dedisperse in ['by-channel', 'coherent'] assert nchan % fh.nchan == 0 if dedisperse == 'by-channel' and fh.nchan > 1: oversample = nchan // fh.nchan assert ntint % oversample == 0 else: oversample = 1 if dedisperse == 'coherent' and fh.nchan > 1: warnings.warn("Doing coherent dedispersion on channelized data. " "May get artefacts!") if comm is None: mpi_rank = 0 mpi_size = 1 else: mpi_rank = comm.rank mpi_size = comm.size npol = getattr(fh, 'npol', 1) assert npol == 1 or npol == 2 if verbose > 1 and mpi_rank == 0: print("Number of polarisations={}".format(npol)) # initialize folded spectrum and waterfall # TODO: use estimated number of points to set dtype if do_foldspec: foldspec = np.zeros((ntbin, nchan, ngate, npol**2), dtype=np.float32) icount = np.zeros((ntbin, nchan, ngate), dtype=np.int32) else: foldspec = None icount = None if do_waterfall: nwsize = nt*ntint//ntw//oversample waterfall = np.zeros((nwsize, nchan, npol**2), dtype=np.float64) else: waterfall = None if verbose and mpi_rank == 0: print('Reading from {}'.format(fh)) nskip = fh.tell()/fh.blocksize if nskip > 0: if verbose and mpi_rank == 0: print('Starting {0} blocks = {1} bytes out from start.' .format(nskip, nskip*fh.blocksize)) dt1 = (1./samplerate).to(u.s) # need 2*nchan real-valued samples for each FFT if fh.telescope == 'lofar': dtsample = fh.dtsample else: dtsample = nchan // oversample * 2 * dt1 tstart = dtsample * ntint * nskip # pre-calculate time delay due to dispersion in coarse channels # for channelized data, frequencies are known tb = -1. if fedge_at_top else +1. if fh.nchan == 1: if getattr(fh, 'data_is_complex', False): # for complex data, really each complex sample consists of # 2 real ones, so multiply dt1 by 2. freq = fedge + tb * fftfreq(nchan, 2.*dt1.value) * u.Hz if dedisperse == 'coherent': fcoh = fedge + tb * fftfreq(nchan*ntint, 2.*dt1.value) * u.Hz fcoh.shape = (-1, 1) elif dedisperse == 'by-channel': fcoh = freq + (tb * fftfreq( ntint, 2.*dtsample.value) * u.Hz)[:, np.newaxis] else: freq = fedge + tb * rfftfreq(nchan*2, dt1.value)[::2] * u.Hz if dedisperse == 'coherent': fcoh = fedge + tb * rfftfreq(nchan*ntint*2, dt1.value)[::2] * u.Hz fcoh.shape = (-1, 1) elif dedisperse == 'by-channel': fcoh = freq + tb * fftfreq( ntint, dtsample.value)[:, np.newaxis] * u.Hz freq_in = freq else: # input frequencies may not be the ones going out freq_in = fh.frequencies if oversample == 1: freq = freq_in else: freq = (freq_in[:, np.newaxis] + tb * u.Hz * rfftfreq(oversample*2, dtsample.value/2.)[::2]) # same as fine = rfftfreq(2*ntint, dtsample.value/2.)[::2] fcoh = freq_in[np.newaxis, :] + tb * u.Hz * rfftfreq( ntint*2, dtsample.value/2.)[::2, np.newaxis] # print('fedge_at_top={0}, tb={1}'.format(fedge_at_top, tb)) ifreq = freq.ravel().argsort() # pre-calculate time offsets in (input) channelized streams dt = dispersion_delay_constant * dm * (1./freq_in**2 - 1./fref**2) if need_fine_channels: # pre-calculate required turns due to dispersion. # # set frequency relative to which dispersion is coherently corrected if dedisperse == 'coherent': _fref = fref else: _fref = freq_in[np.newaxis, :] # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astronomy dang = (dispersion_delay_constant * dm * fcoh * (1./_fref-1./fcoh)**2) * u.cycle with u.set_enabled_equivalencies(u.dimensionless_angles()): dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) # add dimension for polarisation dd_coh = dd_coh[..., np.newaxis] # Calculate the part of the whole file this node should handle. size_per_node = (nt-1)//mpi_size + 1 start_block = mpi_rank*size_per_node end_block = min((mpi_rank+1)*size_per_node, nt) for j in range(start_block, end_block): if verbose and j % progress_interval == 0: print('#{:4d}/{:4d} is doing {:6d}/{:6d} [={:6d}/{:6d}]; ' 'time={:18.12f}' .format(mpi_rank, mpi_size, j+1, nt, j-start_block+1, end_block-start_block, (tstart+dtsample*j*ntint).value)) # time since start # Just in case numbers were set wrong -- break if file ends; # better keep at least the work done. try: raw = fh.seek_record_read(int((nskip+j)*fh.blocksize), fh.blocksize) except(EOFError, IOError) as exc: print("Hit {0!r}; writing data collected.".format(exc)) break if verbose >= 2: print("#{:4d}/{:4d} read {} items" .format(mpi_rank, mpi_size, raw.size), end="") if npol == 2: # multiple polarisations raw = raw.view(raw.dtype.fields.values()[0][0]) if fh.nchan == 1: # raw.shape=(ntint*npol) raw = raw.reshape(-1, npol) else: # raw.shape=(ntint, nchan*npol) raw = raw.reshape(-1, fh.nchan, npol) if rfi_filter_raw is not None: raw, ok = rfi_filter_raw(raw) if verbose >= 2: print("... raw RFI (zap {0}/{1})" .format(np.count_nonzero(~ok), ok.size), end="") if np.can_cast(raw.dtype, np.float32): vals = raw.astype(np.float32) else: assert raw.dtype.kind == 'c' vals = raw if fh.nchan == 1: # have real-valued time stream of complex baseband # if we need some coherentdedispersion, do FT of whole thing, # otherwise to output channels if raw.dtype.kind == 'c': ftchan = len(vals) if dedisperse == 'coherent' else nchan vals = fft(vals.reshape(-1, ftchan, npol), axis=1, overwrite_x=True, **_fftargs) else: # real data ftchan = len(vals) // 2 if dedisperse == 'coherent' else nchan vals = rfft(vals.reshape(-1, ftchan*2, npol), axis=1, overwrite_x=True, **_fftargs) if vals.dtype.kind == 'f': # this depends on version, sigh. # rfft: Re[0], Re[1], Im[1],.,Re[n/2-1], Im[n/2-1], Re[n/2] # re-order to normal fft format (like Numerical Recipes): # Re[0], Re[n], Re[1], Im[1], .... (channel 0 junk anyway) vals = (np.hstack((vals[:, :1], vals[:, -1:], vals[:, 1:-1])) .reshape(-1, ftchan, 2 * npol)) if npol == 2: # reorder pol & real/imag vals1 = vals[:, :, 1] vals[:, :, 1] = vals[:, :, 2] vals[:, :, 2] = vals1 vals = vals.reshape(-1, ftchan, npol, 2) else: vals[:, 0] = vals[:, 0].real + 1j * vals[:, -1].real vals = vals[:, :-1] vals = vals.view(np.complex64).reshape(-1, ftchan, npol) # for incoherent, vals.shape=(ntint, nchan, npol) # for others, (1, ntint*nchan, npol) -> (ntint*nchan, 1, npol) if need_fine_channels: if dedisperse == 'by-channel': fine = fft(vals, axis=0, overwrite_x=True, **_fftargs) else: fine = vals.reshape(-1, 1, npol) else: # data already channelized if need_fine_channels: fine = fft(vals, axis=0, overwrite_x=True, **_fftargs) # have fine.shape=(ntint, fh.nchan, npol) if need_fine_channels: # Dedisperse. fine *= dd_coh # if dedisperse == 'by-channel' and oversample > 1: # fine.shape=(ntint*oversample, chan_in, npol) # =(coarse,fine,fh.chan, npol) # -> reshape(oversample, ntint, fh.nchan, npol) # want (ntint=fine, fh.nchan, oversample, npol) -> .transpose # fine = (fine.reshape(nchan / fh.nchan, -1, fh.nchan, npol) # .transpose(1, 2, 0, 3) # .reshape(-1, nchan, npol)) # now fine.shape=(ntint, nchan, npol) w/ nchan=1 for coherent vals = ifft(fine, axis=0, overwrite_x=True, **_fftargs) if dedisperse == 'coherent' and nchan > 1 and fh.nchan == 1: # final FT to get requested channels vals = vals.reshape(-1, nchan, npol) vals = fft(vals, axis=1, overwrite_x=True, **_fftargs) elif dedisperse == 'by-channel' and oversample > 1: vals = vals.reshape(-1, oversample, fh.nchan, npol) vals = fft(vals, axis=1, overwrite_x=True, **_fftargs) vals = vals.transpose(0, 2, 1, 3).reshape(-1, nchan, npol) # vals[time, chan, pol] if verbose >= 2: print("... dedispersed", end="") if npol == 1: power = vals.real**2 + vals.imag**2 else: p0 = vals[..., 0] p1 = vals[..., 1] power = np.empty(vals.shape[:-1] + (4,), np.float32) power[..., 0] = p0.real**2 + p0.imag**2 power[..., 1] = p0.real*p1.real + p0.imag*p1.imag power[..., 2] = p0.imag*p1.real - p0.real*p1.imag power[..., 3] = p1.real**2 + p1.imag**2 if verbose >= 2: print("... power", end="") # current sample positions and corresponding time in stream isr = j*(ntint // oversample) + np.arange(ntint // oversample) tsr = (isr*dtsample*oversample)[:, np.newaxis] if rfi_filter_power is not None: power = rfi_filter_power(power, tsr.squeeze()) print("... power RFI", end="") # correct for delay if needed if dedisperse in ['incoherent', 'by-channel']: # tsample.shape=(ntint/oversample, nchan_in) tsr = tsr - dt if do_waterfall: # # loop over corresponding positions in waterfall # for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): # if iw < nwsize: # add sum of corresponding samples # waterfall[iw, :] += np.sum(power[isr//ntw == iw], # axis=0)[ifreq] iw = np.round((tsr / dtsample / oversample).to(1) .value / ntw).astype(int) for k, kfreq in enumerate(ifreq): # sort in frequency while at it iwk = iw[:, (0 if iw.shape[1] == 1 else kfreq // oversample)] iwk = np.clip(iwk, 0, nwsize-1, out=iwk) iwkmin = iwk.min() iwkmax = iwk.max()+1 for ipow in range(npol**2): waterfall[iwkmin:iwkmax, k, ipow] += np.bincount( iwk-iwkmin, power[:, kfreq, ipow], iwkmax-iwkmin) if verbose >= 2: print("... waterfall", end="") if do_foldspec: ibin = (j*ntbin) // nt # bin in the time series: 0..ntbin-1 # times and cycles since start time of observation. tsample = tstart + tsr phase = (phasepol(tsample.to(u.s).value.ravel()) .reshape(tsample.shape)) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) for k, kfreq in enumerate(ifreq): # sort in frequency while at it iph = iphase[:, (0 if iphase.shape[1] == 1 else kfreq // oversample)] # sum and count samples by phase bin for ipow in range(npol**2): foldspec[ibin, k, :, ipow] += np.bincount( iph, power[:, kfreq, ipow], ngate) icount[ibin, k, :] += np.bincount( iph, power[:, kfreq, 0] != 0., ngate) if verbose >= 2: print("... folded", end="") if verbose >= 2: print("... done") #Commented out as workaround, this was causing "Referenced before assignment" errors with JB data #if verbose >= 2 or verbose and mpi_rank == 0: # print('#{:4d}/{:4d} read {:6d} out of {:6d}' # .format(mpi_rank, mpi_size, j+1, nt)) if npol == 1: if do_foldspec: foldspec = foldspec.reshape(foldspec.shape[:-1]) if do_waterfall: waterfall = waterfall.reshape(waterfall.shape[:-1]) return foldspec, icount, waterfall
def fold(file1, samplerate, fmid, nchan, nt, ntint, nhead, ngate, ntbin, ntw, dm, fref, phasepol, coherent=False, do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100): """FFT Effelsberg data, fold by phase/time and make a waterfall series Parameters ---------- file1 : string name of the file holding voltage timeseries samplerate : float rate at which samples were originally taken and thus band width (frequency units)) fmid : float mid point of the frequency band (frequency units) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*(2*ntint), with each sample containing real,imag for two polarisations nhead : int number of bytes to skip before reading (usually 4096 for Effelsberg) ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets """ # initialize folded spectrum and waterfall foldspec2 = np.zeros((nchan, ngate, ntbin)) nwsize = nt*ntint//ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (each nchan contains 4 bytes: # real,imag for 2 polarisations). recsize = 4*nchan*ntint if verbose: print('Reading from {}'.format(file1)) myopen = gzip.open if '.gz' in file1 else open with myopen(file1, 'rb', recsize) as fh1: if nhead > 0: if verbose: print('Skipping {0} bytes'.format(nhead)) fh1.seek(nhead) foldspec = np.zeros((nchan, ngate)) icount = np.zeros((nchan, ngate)) # gosh, fftpack has everything; used to calculate with: # fband / nchan * (np.mod(np.arange(nchan)+nchan/2, nchan)-nchan/2) if coherent: # pre-calculate required turns due to dispersion fcoh = (fmid + fftfreq(nchan*ntint, (1./samplerate).to(u.s).value) * u.Hz) # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1./fref-1./fcoh)**2) * 360. * u.deg dedisperse = np.exp(dang.to(u.rad).value * 1j ).conj().astype(np.complex64) else: # pre-calculate time delay due to dispersion freq = fmid + fftfreq(nchan, (1./samplerate).to(u.s).value) * u.Hz dt = (dispersion_delay_constant * dm * (1./freq**2 - 1./fref**2)).to(u.s).value dtsample = (nchan/samplerate).to(u.s).value for j in xrange(nt): if verbose and (j+1) % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j+1, nt, dtsample*j*ntint)) # equivalent time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # data stored as series of two two-byte complex numbers, # one for each polarization raw = np.fromstring(fh1.read(recsize), dtype=np.int8).reshape(-1,2,2) except: break # use view for fast conversion from float to complex vals = raw.astype(np.float32).view(np.complex64).squeeze() # vals[i_int * i_block, i_pol] if coherent: fine = fft(vals, axis=0, overwrite_x=True, **_fftargs) fine *= dedisperse[:,np.newaxis] vals = ifft(fine, axis=0, overwrite_x=True, **_fftargs) chan = fft(vals.reshape(-1, nchan, 2), axis=1, overwrite_x=True, **_fftargs) # chan[i_int, i_block, i_pol] power = np.sum(chan.real**2+chan.imag**2, axis=-1) # current sample positions in stream isr = j*ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:,iw] += np.sum(power[isr//ntw == iw], axis=0) if do_foldspec: tsample = dtsample*isr # times since start for k in xrange(nchan): if coherent: t = tsample # already dedispersed else: t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k] += np.bincount(iphase, power[:,k], ngate) icount[k] += np.bincount(iphase, None, ngate) ibin = j*ntbin//nt # bin in the time series: 0..ntbin-1 if (j+1)*ntbin//nt > ibin: # last addition to bin? # get normalised flux in each bin (where any were added) nonzero = icount > 0 nfoldspec = np.where(nonzero, foldspec/icount, 0.) # subtract phase average and store nfoldspec -= np.where(nonzero, np.sum(nfoldspec, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0) foldspec2[:,:,ibin] = nfoldspec # reset for next iteration foldspec *= 0 icount *= 0 if verbose: print('read {0:6d} out of {1:6d}'.format(j+1, nt)) if do_foldspec: # swap two halfs in frequency, so that freq increases monotonically foldspec2 = fftshift(foldspec2, axes=0) if do_waterfall: nonzero = waterfall == 0. waterfall -= np.where(nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) # swap two halfs in frequency, so that freq increases monotonically waterfall = fftshift(waterfall, axes=0) return foldspec2, waterfall
def fold(fh1, dtype, samplerate, fedge, fedge_at_top, nchan, nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, comm=None): """FFT GMRT data, fold by phase/time and make a waterfall series Parameters ---------- fh1 : file handle handle to file holding voltage timeseries dtype : numpy dtype or '4bit' or '1bit' way the data are stored in the file samplerate : float rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation nskip : int number of records (nchan*ntint*2 for phased data w/ np.int8 real,imag) ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) dedisperse : None or string None, 'incoherent', 'coherent', 'by-channel' do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets comm : MPI communicator (default: None) """ if comm is None: rank = 0 size = 1 else: rank = comm.rank size = comm.size # initialize folded spectrum and waterfall foldspec2 = np.zeros((nchan, ngate, ntbin)) nwsize = nt*ntint//ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (simple for ARO: 1 byte/sample) # double since we need to get ntint samples after FFT itemsize = {np.int8: 2}[dtype] recsize = nchan*ntint*itemsize if verbose: print('Reading from {}'.format(fh1)) if nskip > 0: if verbose: print('Skipping {0} {1}-byte records'.format(nskip, recsize)) if size == 1: fh1.seek(nskip * recsize) foldspec = np.zeros((nchan, ngate, ntbin), dtype=np.int) icount = np.zeros((nchan, ngate, ntbin), dtype=np.int) dt1 = (1./samplerate).to(u.s) # but include 2*nchan real-valued samples used for each FFT # (or, equivalently, real and imag for channels) dtsample = nchan * 2 * dt1 tstart = dt1 * nskip * recsize # pre-calculate time delay due to dispersion in coarse channels freq = fftshift(fftfreq(nchan, 2.*dt1.value)) * u.Hz freq = (fedge - (freq-freq[0]) if fedge_at_top else fedge + (freq-freq[0])) # [::2] sets frequency channels to numerical recipes ordering dt = (dispersion_delay_constant * dm * (1./freq**2 - 1./fref**2)).to(u.s).value # if dedisperse in {'coherent', 'by-channel'}: # # pre-calculate required turns due to dispersion # fcoh = (fedge - fftfreq(nchan*ntint, 2.*dt1) # if fedge_at_top # else # fedge + fftfreq(nchan*ntint, 2.*dt1)) # # set frequency relative to which dispersion is coherently corrected # if dedisperse == 'coherent': # _fref = fref # else: # _fref = np.repeat(freq.value, ntint) * freq.unit # # (check via eq. 5.21 and following in # # Lorimer & Kramer, Handbook of Pulsar Astrono # dang = (dispersion_delay_constant * dm * fcoh * # (1./_fref-1./fcoh)**2) * 360. * u.deg # # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n] # # for 0 and n need only real part, but for 1...n-1 need real, imag # # so just get shifts for r[1], r[2], ..., r[n-1] # dang = dang.to(u.rad).value[1:-1:2] # dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) for j in xrange(rank, nt, size): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j+1, nt, (tstart+dtsample*j*ntint).value)) # time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: if size > 1: fh1.seek((nskip + j) * recsize) # data just a series of byte pairs, of real and imag raw = fromfile(fh1, dtype, recsize) except(EOFError, IOError) as exc: print("Hit {}; writing pgm's".format(exc)) break if verbose == 'very': print("Read {} items".format(raw.size), end="") vals = raw.astype(np.float32).view(np.complex64).squeeze() # if dedisperse in {'coherent', 'by-channel'}: # fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs) # fine_cmplx = fine[1:-1].view(np.complex64) # fine_cmplx *= dd_coh # this overwrites parts of fine, as intended # vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs) # if verbose == 'very': # print("... dedispersed", end="") chan = vals.reshape(-1, nchan) if verbose == 'very': print("... power", end="") power = chan.real**2+chan.imag**2 # current sample positions in stream isr = j*ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:,iw] += np.sum(power[isr//ntw == iw], axis=0) if verbose == 'very': print("... waterfall", end="") if do_foldspec: tsample = (tstart + isr*dtsample).value # times since start ibin = j*ntbin//nt # bin in the time series: 0..ntbin-1 for k in xrange(nchan): if dedisperse == 'coherent': t = tsample # already dedispersed else: t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k,:,ibin] += np.bincount(iphase, power[:,k], ngate) icount[k,:,ibin] += np.bincount(iphase, None, ngate) if verbose == 'very': print("... folded", end="") if 0: #done in gmrt.py (j+1)*ntbin//nt > ibin: # last addition to bin? # get normalised flux in each bin (where any were added) nonzero = icount > 0 nfoldspec = np.where(nonzero, foldspec/icount, 0.) # subtract phase average and store nfoldspec -= np.where(nonzero, np.sum(nfoldspec, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0) foldspec2[:,:,ibin] = nfoldspec # reset for next iteration foldspec *= 0 icount *= 0 if verbose == 'very': print("... added", end="") if verbose == 'very': print("... done") if verbose: print('read {0:6d} out of {1:6d}'.format(j+1, nt)) if 0: # done in gmrt.py do_waterfall: nonzero = waterfall == 0. waterfall -= np.where(nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) return foldspec, icount, waterfall
def fold(fh1, dtype, samplerate, fedge, fedge_at_top, nchan, nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, comm=None): """FFT GMRT data, fold by phase/time and make a waterfall series Parameters ---------- fh1 : file handle handle to file holding voltage timeseries dtype : numpy dtype or '4bit' or '1bit' way the data are stored in the file samplerate : float rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation nskip : int number of records (nchan*ntint*2 for phased data w/ np.int8 real,imag) ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) dedisperse : None or string None, 'incoherent', 'coherent', 'by-channel' do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets comm : MPI communicator (default: None) """ if comm is None: rank = 0 size = 1 else: rank = comm.rank size = comm.size # initialize folded spectrum and waterfall foldspec2 = np.zeros((nchan, ngate, ntbin)) nwsize = nt * ntint // ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (simple for ARO: 1 byte/sample) # double since we need to get ntint samples after FFT itemsize = {np.int8: 2}[dtype] recsize = nchan * ntint * itemsize if verbose: print('Reading from {}'.format(fh1)) if nskip > 0: if verbose: print('Skipping {0} {1}-byte records'.format(nskip, recsize)) if size == 1: fh1.seek(nskip * recsize) foldspec = np.zeros((nchan, ngate, ntbin), dtype=np.int) icount = np.zeros((nchan, ngate, ntbin), dtype=np.int) dt1 = (1. / samplerate).to(u.s) # but include 2*nchan real-valued samples used for each FFT # (or, equivalently, real and imag for channels) dtsample = nchan * 2 * dt1 tstart = dt1 * nskip * recsize # pre-calculate time delay due to dispersion in coarse channels freq = fftshift(fftfreq(nchan, 2. * dt1.value)) * u.Hz freq = (fedge - (freq - freq[0]) if fedge_at_top else fedge + (freq - freq[0])) # [::2] sets frequency channels to numerical recipes ordering dt = (dispersion_delay_constant * dm * (1. / freq**2 - 1. / fref**2)).to( u.s).value # if dedisperse in {'coherent', 'by-channel'}: # # pre-calculate required turns due to dispersion # fcoh = (fedge - fftfreq(nchan*ntint, 2.*dt1) # if fedge_at_top # else # fedge + fftfreq(nchan*ntint, 2.*dt1)) # # set frequency relative to which dispersion is coherently corrected # if dedisperse == 'coherent': # _fref = fref # else: # _fref = np.repeat(freq.value, ntint) * freq.unit # # (check via eq. 5.21 and following in # # Lorimer & Kramer, Handbook of Pulsar Astrono # dang = (dispersion_delay_constant * dm * fcoh * # (1./_fref-1./fcoh)**2) * 360. * u.deg # # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n] # # for 0 and n need only real part, but for 1...n-1 need real, imag # # so just get shifts for r[1], r[2], ..., r[n-1] # dang = dang.to(u.rad).value[1:-1:2] # dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) for j in xrange(rank, nt, size): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j + 1, nt, (tstart + dtsample * j * ntint).value)) # time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: if size > 1: fh1.seek((nskip + j) * recsize) # data just a series of byte pairs, of real and imag raw = fromfile(fh1, dtype, recsize) except (EOFError, IOError) as exc: print("Hit {}; writing pgm's".format(exc)) break if verbose == 'very': print("Read {} items".format(raw.size), end="") vals = raw.astype(np.float32).view(np.complex64).squeeze() # if dedisperse in {'coherent', 'by-channel'}: # fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs) # fine_cmplx = fine[1:-1].view(np.complex64) # fine_cmplx *= dd_coh # this overwrites parts of fine, as intended # vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs) # if verbose == 'very': # print("... dedispersed", end="") chan = vals.reshape(-1, nchan) if verbose == 'very': print("... power", end="") power = chan.real**2 + chan.imag**2 # current sample positions in stream isr = j * ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:, iw] += np.sum(power[isr // ntw == iw], axis=0) if verbose == 'very': print("... waterfall", end="") if do_foldspec: tsample = (tstart + isr * dtsample).value # times since start ibin = j * ntbin // nt # bin in the time series: 0..ntbin-1 for k in xrange(nchan): if dedisperse == 'coherent': t = tsample # already dedispersed else: t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase * ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k, :, ibin] += np.bincount(iphase, power[:, k], ngate) icount[k, :, ibin] += np.bincount(iphase, None, ngate) if verbose == 'very': print("... folded", end="") if 0: #done in gmrt.py (j+1)*ntbin//nt > ibin: # last addition to bin? # get normalised flux in each bin (where any were added) nonzero = icount > 0 nfoldspec = np.where(nonzero, foldspec / icount, 0.) # subtract phase average and store nfoldspec -= np.where( nonzero, np.sum(nfoldspec, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0) foldspec2[:, :, ibin] = nfoldspec # reset for next iteration foldspec *= 0 icount *= 0 if verbose == 'very': print("... added", end="") if verbose == 'very': print("... done") if verbose: print('read {0:6d} out of {1:6d}'.format(j + 1, nt)) if 0: # done in gmrt.py do_waterfall: nonzero = waterfall == 0. waterfall -= np.where( nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) return foldspec, icount, waterfall