def fold(file1, file2, dtype, fbottom, fwidth, nchan, nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol, coherent=False, do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100): """Fold pre-channelized LOFAR data, possibly dedispersing it Parameters ---------- file1, file2 : string names of the files holding real and imaginary subchannel timeseries dtype : numpy dtype way the data are stored in the file (normally '>f4') fbottom : float frequency of the lowest channel (frequency units) fwidth : float channel width (frequency units, normally 200*u.MHz/1024.) nchan : int number of frequency channels nt, ntint : int number nt of sets to use, each containing ntint samples; hence, total # of samples used is nt*(2*ntint). nskip : int number of bytes to skip before reading ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to the start of the file that is read (i.e., including nskip) do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets """ # initialize folded spectrum and waterfall foldspec2 = np.zeros((nchan, ngate, ntbin)) nwsize = nt*ntint//ntw waterfall = np.zeros((nchan, nwsize)) # # of items to read from file. itemsize = np.dtype(dtype).itemsize count = nchan*ntint if verbose: print('Reading from {}\n and {}'.format(file1, file2)) with open(file1, 'rb', count*itemsize) as fh1, \ open(file2, 'rb', count*itemsize) as fh2: if nskip > 0: if verbose: print('Skipping {0} bytes'.format(nskip)) fh1.seek(nskip) fh2.seek(nskip) foldspec = np.zeros((nchan, ngate)) icount = np.zeros((nchan, ngate)) dtsample = (1./fwidth).to(u.s) tstart = dtsample * nskip // itemsize // nchan # pre-calculate time delay due to dispersion in course channels freq = fbottom + fwidth * np.arange(nchan) dt = (dispersion_delay_constant * dm * (1./freq**2 - 1./fref**2)).to(u.s).value for j in xrange(nt): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j+1, nt, (tstart+dtsample*j*ntint).value)) # time since start of file # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # data stored as series of floats in two files, # one for real and one for imaginary raw1 = fromfile(fh1, dtype, count).reshape(-1,nchan) raw2 = fromfile(fh2, dtype, count).reshape(-1,nchan) except(EOFError): break power = raw1**2+raw2**2 # power[#int, #block] # current sample positions in stream isr = j*ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:,iw] += np.sum(power[isr//ntw == iw], axis=0) if do_foldspec: tsample = (tstart + dtsample * isr).value # times since start for k in xrange(nchan): t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k] += np.bincount(iphase, power[:,k], ngate) icount[k] += np.bincount(iphase, None, ngate) ibin = j*ntbin//nt # bin in the time series: 0..ntbin-1 if (j+1)*ntbin//nt > ibin: # last addition to bin? # get normalised flux in each bin (where any were added) nonzero = icount > 0 nfoldspec = np.where(nonzero, foldspec/icount, 0.) # subtract phase average and store nfoldspec -= np.where(nonzero, np.sum(nfoldspec, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0) foldspec2[:,:,ibin] = nfoldspec # reset for next iteration foldspec *= 0 icount *= 0 if verbose: print('read {0:6d} out of {1:6d}'.format(j+1, nt)) if do_waterfall: nonzero = waterfall == 0. waterfall -= np.where(nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) return foldspec2, waterfall
def fold(file1, dtype, samplerate, fedge, fedge_at_top, nchan, nt, ntint, nhead, ngate, ntbin, ntw, dm, fref, phasepol, coherent=False, do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100): """FFT ARO data, fold by phase/time and make a waterfall series Parameters ---------- file1 : string name of the file holding voltage timeseries dtype : numpy dtype or '4bit' or '1bit' way the data are stored in the file samplerate : float rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: book whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation nhead : int number of bytes to skip before reading (usually 0 for ARO) ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets """ # initialize folded spectrum and waterfall foldspec2 = np.zeros((nchan, ngate, ntbin)) nwsize = nt*ntint//ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (simple for ARO: 1 byte/sample) recsize = nchan*ntint*{np.int8: 2, '4bit': 1}[dtype] if verbose: print('Reading from {}'.format(file1)) with open(file1, 'rb', recsize) as fh1: if nhead > 0: if verbose: print('Skipping {0} bytes'.format(nhead)) fh1.seek(nhead) foldspec = np.zeros((nchan, ngate), dtype=np.int) icount = np.zeros((nchan, ngate), dtype=np.int) dt1 = (1./samplerate).to(u.s) if coherent: # pre-calculate required turns due to dispersion fcoh = (fedge - rfftfreq(nchan*ntint, dt1.value) * u.Hz if fedge_at_top else fedge + rfftfreq(nchan*ntint, dt1.value) * u.Hz) # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1./fref-1./fcoh)**2) * 360. * u.deg dedisperse = np.exp(dang.to(u.rad).value * 1j ).conj().astype(np.complex64).view(np.float32) # get these back into order r[0], r[1],i[1],...r[n-1],i[n-1],r[n] dedisperse = np.hstack([dedisperse[:1], dedisperse[2:-1]]) else: # pre-calculate time delay due to dispersion; # [::2] sets frequency channels to numerical recipes ordering freq = (fedge - rfftfreq(nchan*2, dt1.value)[::2] * u.Hz if fedge_at_top else fedge + rfftfreq(nchan*2, dt1.value)[::2] * u.Hz) dt = (dispersion_delay_constant * dm * (1./freq**2 - 1./fref**2)).to(u.s).value # need 2*nchan samples for each FFT dtsample = (nchan*2/samplerate).to(u.s).value for j in xrange(nt): if verbose and (j+1) % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j+1, nt, dtsample*j*ntint)) # equivalent time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # data just a series of bytes, each containing one 8 bit or # two 4-bit samples (set by dtype in caller) raw = fromfile(fh1, dtype, recsize) except(EOFError, IOError) as exc: print("Hit {}; writing pgm's".format(exc)) break vals = raw.astype(np.float32) if coherent: fine = rfft(vals, axis=0, overwrite_x=True) fine *= dedisperse vals = irfft(fine, axis=0, overwrite_x=True) chan2 = rfft(vals.reshape(-1, nchan*2), axis=-1, overwrite_x=True)**2 # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2] # re-order to Num.Rec. format: Re[0], Re[n/2], Re[1], .... power = np.hstack((chan2[:,:1]+chan2[:,-1:], chan2[:,1:-1].reshape(-1,nchan-1,2).sum(-1))) # current sample positions in stream isr = j*ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:,iw] += np.sum(power[isr//ntw == iw], axis=0) if do_foldspec: tsample = dtsample*isr # times since start for k in xrange(nchan): if coherent: t = tsample # already dedispersed else: t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k] += np.bincount(iphase, power[:,k], ngate) icount[k] += np.bincount(iphase, None, ngate) ibin = j*ntbin//nt # bin in the time series: 0..ntbin-1 if (j+1)*ntbin//nt > ibin: # last addition to bin? # get normalised flux in each bin (where any were added) nonzero = icount > 0 nfoldspec = np.where(nonzero, foldspec/icount, 0.) # subtract phase average and store nfoldspec -= np.where(nonzero, np.sum(nfoldspec, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0) foldspec2[:,:,ibin] = nfoldspec # reset for next iteration foldspec *= 0 icount *= 0 if verbose: print('read {0:6d} out of {1:6d}'.format(j+1, nt)) if do_waterfall: nonzero = waterfall == 0. waterfall -= np.where(nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) return foldspec2, waterfall
def fold(file1, file2, dtype, fbottom, fwidth, nchan, nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol, coherent=False, do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, comm=None): """Fold pre-channelized LOFAR data, possibly dedispersing it Parameters ---------- file1, file2 : string names of the files holding real and imaginary subchannel timeseries dtype : numpy dtype way the data are stored in the file (normally '>f4') fbottom : float frequency of the lowest channel (frequency units) fwidth : float channel width (frequency units, normally 200*u.MHz/1024.) nchan : int number of frequency channels nt, ntint : int number nt of sets to use, each containing ntint samples; hence, total # of samples used is nt*ntint for each channel. nskip : int number of records (nskip*ntint*4*nchan bytes) to skip before reading ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to the start of the file that is read (i.e., including nskip) coherent : bool Whether to do dispersion coherently within finer channels do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets comm : MPI communicator (default: None """ if comm is not None: rank = comm.rank size = comm.size else: rank = 0 size = 1 def mpilofile(comm, file): return open(file) # initialize folded spectrum and waterfall if do_foldspec: foldspec = np.zeros((nchan, ngate, ntbin)) icount = np.zeros((nchan, ngate, ntbin)) else: foldspec = None icount = None if do_waterfall: nwsize = nt*ntint//ntw waterfall = np.zeros((nchan, nwsize)) else: waterfall = None # # of items to read from file. itemsize = np.dtype(dtype).itemsize count = nchan*ntint if verbose and rank == 0: print('Reading from {}\n and {}'.format(file1, file2)) with mpilofile(comm, file1) as fh1, \ mpilofile(comm, file2) as fh2: if nskip > 0: if verbose and rank == 0: print('Skipping {0} bytes'.format(nskip)) # if # MPI processes > 1 we seek in for-loop if size == 1: fh1.seek(nskip * count * itemsize) fh2.seek(nskip * count * itemsize) dtsample = (1./fwidth).to(u.s) tstart = dtsample * nskip * ntint # pre-calculate time delay due to dispersion in course channels freq = fbottom + fwidth*np.arange(nchan) dt = (dispersion_delay_constant * dm * (1./freq**2 - 1./fref**2)).to(u.s).value if coherent: # pre-calculate required turns due to dispersion in fine channels fcoh = (freq[np.newaxis,:] + fftfreq(ntint, dtsample.value)[:,np.newaxis] * u.Hz) # fcoh[fine, channel] # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1./freq - 1./fcoh)**2) * u.cycle dedisperse = np.exp(dang.to(u.rad).value * 1j ).conj().astype(np.complex64) for j in xrange(rank, nt, size): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j+1, nt, (tstart+dtsample*j*ntint).value)) # time since start of file # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # data stored as series of floats in two files, # one for real and one for imaginary if size > 1: fh1.seek((nskip + j)*count*itemsize) fh2.seek((nskip + j)*count*itemsize) raw1 = fromfile(fh1, dtype, count*itemsize).reshape(-1,nchan) raw2 = fromfile(fh2, dtype, count*itemsize).reshape(-1,nchan) except(EOFError): break # int 8 test iraw = (raw1*128.).astype(np.int8) raw1 = iraw.astype(np.float32)/128. iraw = (raw2*128.).astype(np.int8) raw2 = iraw.astype(np.float32)/128. if coherent: chan = raw1 + 1j*raw2 # vals[#int, #chan]; FT channels to finely spaced grid fine = fft(chan, axis=0, overwrite_x=True) # fine[#fine, #chan]; correct for dispersion w/i chan fine *= dedisperse # fine[#fine, #chan]; FT back to channel timeseries chan = ifft(fine, axis=0, overwrite_x=True) # vals[#int, #chan] power = chan.real**2 + chan.imag**2 # power[#int, #chan]; timeit -> 0.6x shorter than abs(chan)**2 else: power = raw1**2 + raw2**2 # power[#int, #chan] # current sample positions in stream isr = j*ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:,iw] += np.sum(power[isr//ntw == iw], axis=0) if do_foldspec: tsample = (tstart + isr*dtsample).value # times since start ibin = j*ntbin//nt for k in xrange(nchan): t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k,:,ibin] += np.bincount(iphase, power[:,k], ngate) icount[k,:,ibin] += np.bincount(iphase, None, ngate) if verbose: print('read {0:6d} out of {1:6d}'.format(j+1, nt)) if do_waterfall: nonzero = waterfall == 0. waterfall -= np.where(nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) return foldspec, icount, waterfall
def record_read(self, count): return fromfile(self, self.dtype, count).reshape(-1, self.nchan).squeeze()
def fold(file1, samplerate, fmid, nchan, nt, ntint, nhead, ngate, ntbin, ntw, dm, fref, phasepol, coherent=False, do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100): """FFT Effelsberg data, fold by phase/time and make a waterfall series Parameters ---------- file1 : string name of the file holding voltage timeseries samplerate : float rate at which samples were originally taken and thus band width (frequency units)) fmid : float mid point of the frequency band (frequency units) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*(2*ntint), with each sample containing real,imag for two polarisations nhead : int number of bytes to skip before reading (usually 4096 for Effelsberg) ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) coherent : bool Whether to do dispersion coherently within finer channels do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets """ # initialize folded spectrum and waterfall foldspec2 = np.zeros((nchan, ngate, ntbin)) nwsize = nt*ntint//ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (each nchan contains 4 bytes: # real,imag for 2 polarisations). recsize = 4*nchan*ntint if verbose: print('Reading from {}'.format(file1)) myopen = gzip.open if '.gz' in file1 else open with myopen(file1, 'rb', recsize) as fh1: if nhead > 0: if verbose: print('Skipping {0} bytes'.format(nhead)) fh1.seek(nhead) foldspec = np.zeros((nchan, ngate)) icount = np.zeros((nchan, ngate)) dt1 = (1./samplerate).to(u.s) dtsample = nchan * dt1 # pre-calculate time delay due to dispersion in course channels freq = fmid + fftfreq(nchan, dt1.value) * u.Hz dt = (dispersion_delay_constant * dm * (1./freq**2 - 1./fref**2)).to(u.s).value if coherent: # pre-calculate required turns due to dispersion in fine channels fcoh = (freq + fftfreq(ntint, dtsample.value)[:,np.newaxis] * u.Hz) # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1./freq-1./fcoh)**2) * 360. * u.deg dedisperse = np.exp(dang.to(u.rad).value * 1j ).conj().astype(np.complex64) for j in xrange(nt): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j+1, nt, dtsample.value*j*ntint)) # time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # data stored as series of two two-byte complex numbers, # one for each polarization raw = fromfile(fh1, np.int8, recsize).reshape(-1,2,2) except(EOFError): break # use view for fast conversion from float to complex vals = raw.astype(np.float32).view(np.complex64).reshape( -1, nchan, 2) # vals[#int, #block, #pol] if coherent: # FT to channels, then FT those to finely spaced grid fine = fft2(vals, axes=(0,1), overwrite_x=True) # fine[#fine, #chan, #pol]; correct for dispersion w/i chan fine *= dedisperse[:,:,np.newaxis] # fine[#fine, #chan, #pol]; FT back to channel timeseries chan = ifft(fine, axis=0, overwrite_x=True) # vals[#int, #block, #pol] else: # FT channels chan = fft(vals, axis=1, overwrite_x=True) # chan[#int, #chan, #pol] power = np.sum(chan.real**2+chan.imag**2, axis=-1) # power[#int, #block] # current sample positions in stream isr = j*ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:,iw] += np.sum(power[isr//ntw == iw], axis=0) if do_foldspec: tsample = dtsample.value * isr # times since start for k in xrange(nchan): t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k] += np.bincount(iphase, power[:,k], ngate) icount[k] += np.bincount(iphase, None, ngate) ibin = j*ntbin//nt # bin in the time series: 0..ntbin-1 if (j+1)*ntbin//nt > ibin: # last addition to bin? # get normalised flux in each bin (where any were added) nonzero = icount > 0 nfoldspec = np.where(nonzero, foldspec/icount, 0.) # subtract phase average and store nfoldspec -= np.where(nonzero, np.sum(nfoldspec, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0) foldspec2[:,:,ibin] = nfoldspec # reset for next iteration foldspec *= 0 icount *= 0 if verbose: print('read {0:6d} out of {1:6d}'.format(j+1, nt)) if do_foldspec: # swap two halfs in frequency, so that freq increases monotonically foldspec2 = fftshift(foldspec2, axes=0) if do_waterfall: nonzero = waterfall == 0. waterfall -= np.where(nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) # swap two halfs in frequency, so that freq increases monotonically waterfall = fftshift(waterfall, axes=0) return foldspec2, waterfall
def fold(file1, file2, dtype, fbottom, fwidth, nchan, nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol, coherent=False, do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, comm=None): """Fold pre-channelized LOFAR data, possibly dedispersing it Parameters ---------- file1, file2 : string names of the files holding real and imaginary subchannel timeseries dtype : numpy dtype way the data are stored in the file (normally '>f4') fbottom : float frequency of the lowest channel (frequency units) fwidth : float channel width (frequency units, normally 200*u.MHz/1024.) nchan : int number of frequency channels nt, ntint : int number nt of sets to use, each containing ntint samples; hence, total # of samples used is nt*ntint for each channel. nskip : int number of records (nskip*ntint*4*nchan bytes) to skip before reading ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to the start of the file that is read (i.e., including nskip) coherent : bool Whether to do dispersion coherently within finer channels do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets comm : MPI communicator (default: None """ if comm is not None: rank = comm.rank size = comm.size else: rank = 0 size = 1 def mpilofile(comm, file): return open(file) # initialize folded spectrum and waterfall if do_foldspec: foldspec = np.zeros((nchan, ngate, ntbin)) icount = np.zeros((nchan, ngate, ntbin)) else: foldspec = None icount = None if do_waterfall: nwsize = nt * ntint // ntw waterfall = np.zeros((nchan, nwsize)) else: waterfall = None # # of items to read from file. itemsize = np.dtype(dtype).itemsize count = nchan * ntint if verbose and rank == 0: print('Reading from {}\n and {}'.format(file1, file2)) with mpilofile(comm, file1) as fh1, \ mpilofile(comm, file2) as fh2: if nskip > 0: if verbose and rank == 0: print('Skipping {0} bytes'.format(nskip)) # if # MPI processes > 1 we seek in for-loop if size == 1: fh1.seek(nskip * count * itemsize) fh2.seek(nskip * count * itemsize) dtsample = (1. / fwidth).to(u.s) tstart = dtsample * nskip * ntint # pre-calculate time delay due to dispersion in course channels freq = fbottom + fwidth * np.arange(nchan) dt = (dispersion_delay_constant * dm * (1. / freq**2 - 1. / fref**2)).to(u.s).value if coherent: # pre-calculate required turns due to dispersion in fine channels fcoh = (freq[np.newaxis, :] + fftfreq(ntint, dtsample.value)[:, np.newaxis] * u.Hz) # fcoh[fine, channel] # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1. / freq - 1. / fcoh)**2) * u.cycle dedisperse = np.exp(dang.to(u.rad).value * 1j).conj().astype( np.complex64) for j in xrange(rank, nt, size): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j + 1, nt, (tstart + dtsample * j * ntint).value)) # time since start of file # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # data stored as series of floats in two files, # one for real and one for imaginary if size > 1: fh1.seek((nskip + j) * count * itemsize) fh2.seek((nskip + j) * count * itemsize) raw1 = fromfile(fh1, dtype, count * itemsize).reshape(-1, nchan) raw2 = fromfile(fh2, dtype, count * itemsize).reshape(-1, nchan) except (EOFError): break # int 8 test iraw = (raw1 * 128.).astype(np.int8) raw1 = iraw.astype(np.float32) / 128. iraw = (raw2 * 128.).astype(np.int8) raw2 = iraw.astype(np.float32) / 128. if coherent: chan = raw1 + 1j * raw2 # vals[#int, #chan]; FT channels to finely spaced grid fine = fft(chan, axis=0, overwrite_x=True) # fine[#fine, #chan]; correct for dispersion w/i chan fine *= dedisperse # fine[#fine, #chan]; FT back to channel timeseries chan = ifft(fine, axis=0, overwrite_x=True) # vals[#int, #chan] power = chan.real**2 + chan.imag**2 # power[#int, #chan]; timeit -> 0.6x shorter than abs(chan)**2 else: power = raw1**2 + raw2**2 # power[#int, #chan] # current sample positions in stream isr = j * ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:, iw] += np.sum(power[isr // ntw == iw], axis=0) if do_foldspec: tsample = (tstart + isr * dtsample).value # times since start ibin = j * ntbin // nt for k in xrange(nchan): t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase * ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k, :, ibin] += np.bincount(iphase, power[:, k], ngate) icount[k, :, ibin] += np.bincount(iphase, None, ngate) if verbose: print('read {0:6d} out of {1:6d}'.format(j + 1, nt)) if do_waterfall: nonzero = waterfall == 0. waterfall -= np.where( nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) return foldspec, icount, waterfall
def fold(fh1, dtype, samplerate, fedge, fedge_at_top, nchan, nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, comm=None): """FFT GMRT data, fold by phase/time and make a waterfall series Parameters ---------- fh1 : file handle handle to file holding voltage timeseries dtype : numpy dtype or '4bit' or '1bit' way the data are stored in the file samplerate : float rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation nskip : int number of records (nchan*ntint*2 for phased data w/ np.int8 real,imag) ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) dedisperse : None or string None, 'incoherent', 'coherent', 'by-channel' do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets comm : MPI communicator (default: None) """ if comm is None: rank = 0 size = 1 else: rank = comm.rank size = comm.size # initialize folded spectrum and waterfall foldspec2 = np.zeros((nchan, ngate, ntbin)) nwsize = nt*ntint//ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (simple for ARO: 1 byte/sample) # double since we need to get ntint samples after FFT itemsize = {np.int8: 2}[dtype] recsize = nchan*ntint*itemsize if verbose: print('Reading from {}'.format(fh1)) if nskip > 0: if verbose: print('Skipping {0} {1}-byte records'.format(nskip, recsize)) if size == 1: fh1.seek(nskip * recsize) foldspec = np.zeros((nchan, ngate, ntbin), dtype=np.int) icount = np.zeros((nchan, ngate, ntbin), dtype=np.int) dt1 = (1./samplerate).to(u.s) # but include 2*nchan real-valued samples used for each FFT # (or, equivalently, real and imag for channels) dtsample = nchan * 2 * dt1 tstart = dt1 * nskip * recsize # pre-calculate time delay due to dispersion in coarse channels freq = fftshift(fftfreq(nchan, 2.*dt1.value)) * u.Hz freq = (fedge - (freq-freq[0]) if fedge_at_top else fedge + (freq-freq[0])) # [::2] sets frequency channels to numerical recipes ordering dt = (dispersion_delay_constant * dm * (1./freq**2 - 1./fref**2)).to(u.s).value # if dedisperse in {'coherent', 'by-channel'}: # # pre-calculate required turns due to dispersion # fcoh = (fedge - fftfreq(nchan*ntint, 2.*dt1) # if fedge_at_top # else # fedge + fftfreq(nchan*ntint, 2.*dt1)) # # set frequency relative to which dispersion is coherently corrected # if dedisperse == 'coherent': # _fref = fref # else: # _fref = np.repeat(freq.value, ntint) * freq.unit # # (check via eq. 5.21 and following in # # Lorimer & Kramer, Handbook of Pulsar Astrono # dang = (dispersion_delay_constant * dm * fcoh * # (1./_fref-1./fcoh)**2) * 360. * u.deg # # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n] # # for 0 and n need only real part, but for 1...n-1 need real, imag # # so just get shifts for r[1], r[2], ..., r[n-1] # dang = dang.to(u.rad).value[1:-1:2] # dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) for j in xrange(rank, nt, size): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j+1, nt, (tstart+dtsample*j*ntint).value)) # time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: if size > 1: fh1.seek((nskip + j) * recsize) # data just a series of byte pairs, of real and imag raw = fromfile(fh1, dtype, recsize) except(EOFError, IOError) as exc: print("Hit {}; writing pgm's".format(exc)) break if verbose == 'very': print("Read {} items".format(raw.size), end="") vals = raw.astype(np.float32).view(np.complex64).squeeze() # if dedisperse in {'coherent', 'by-channel'}: # fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs) # fine_cmplx = fine[1:-1].view(np.complex64) # fine_cmplx *= dd_coh # this overwrites parts of fine, as intended # vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs) # if verbose == 'very': # print("... dedispersed", end="") chan = vals.reshape(-1, nchan) if verbose == 'very': print("... power", end="") power = chan.real**2+chan.imag**2 # current sample positions in stream isr = j*ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:,iw] += np.sum(power[isr//ntw == iw], axis=0) if verbose == 'very': print("... waterfall", end="") if do_foldspec: tsample = (tstart + isr*dtsample).value # times since start ibin = j*ntbin//nt # bin in the time series: 0..ntbin-1 for k in xrange(nchan): if dedisperse == 'coherent': t = tsample # already dedispersed else: t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k,:,ibin] += np.bincount(iphase, power[:,k], ngate) icount[k,:,ibin] += np.bincount(iphase, None, ngate) if verbose == 'very': print("... folded", end="") if 0: #done in gmrt.py (j+1)*ntbin//nt > ibin: # last addition to bin? # get normalised flux in each bin (where any were added) nonzero = icount > 0 nfoldspec = np.where(nonzero, foldspec/icount, 0.) # subtract phase average and store nfoldspec -= np.where(nonzero, np.sum(nfoldspec, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0) foldspec2[:,:,ibin] = nfoldspec # reset for next iteration foldspec *= 0 icount *= 0 if verbose == 'very': print("... added", end="") if verbose == 'very': print("... done") if verbose: print('read {0:6d} out of {1:6d}'.format(j+1, nt)) if 0: # done in gmrt.py do_waterfall: nonzero = waterfall == 0. waterfall -= np.where(nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) return foldspec, icount, waterfall
def fold(fh1, dtype, samplerate, fedge, fedge_at_top, nchan, nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, comm=None): """FFT GMRT data, fold by phase/time and make a waterfall series Parameters ---------- fh1 : file handle handle to file holding voltage timeseries dtype : numpy dtype or '4bit' or '1bit' way the data are stored in the file samplerate : float rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation nskip : int number of records (nchan*ntint*2 for phased data w/ np.int8 real,imag) ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) dedisperse : None or string None, 'incoherent', 'coherent', 'by-channel' do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets comm : MPI communicator (default: None) """ if comm is None: rank = 0 size = 1 else: rank = comm.rank size = comm.size # initialize folded spectrum and waterfall foldspec2 = np.zeros((nchan, ngate, ntbin)) nwsize = nt * ntint // ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (simple for ARO: 1 byte/sample) # double since we need to get ntint samples after FFT itemsize = {np.int8: 2}[dtype] recsize = nchan * ntint * itemsize if verbose: print('Reading from {}'.format(fh1)) if nskip > 0: if verbose: print('Skipping {0} {1}-byte records'.format(nskip, recsize)) if size == 1: fh1.seek(nskip * recsize) foldspec = np.zeros((nchan, ngate, ntbin), dtype=np.int) icount = np.zeros((nchan, ngate, ntbin), dtype=np.int) dt1 = (1. / samplerate).to(u.s) # but include 2*nchan real-valued samples used for each FFT # (or, equivalently, real and imag for channels) dtsample = nchan * 2 * dt1 tstart = dt1 * nskip * recsize # pre-calculate time delay due to dispersion in coarse channels freq = fftshift(fftfreq(nchan, 2. * dt1.value)) * u.Hz freq = (fedge - (freq - freq[0]) if fedge_at_top else fedge + (freq - freq[0])) # [::2] sets frequency channels to numerical recipes ordering dt = (dispersion_delay_constant * dm * (1. / freq**2 - 1. / fref**2)).to( u.s).value # if dedisperse in {'coherent', 'by-channel'}: # # pre-calculate required turns due to dispersion # fcoh = (fedge - fftfreq(nchan*ntint, 2.*dt1) # if fedge_at_top # else # fedge + fftfreq(nchan*ntint, 2.*dt1)) # # set frequency relative to which dispersion is coherently corrected # if dedisperse == 'coherent': # _fref = fref # else: # _fref = np.repeat(freq.value, ntint) * freq.unit # # (check via eq. 5.21 and following in # # Lorimer & Kramer, Handbook of Pulsar Astrono # dang = (dispersion_delay_constant * dm * fcoh * # (1./_fref-1./fcoh)**2) * 360. * u.deg # # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n] # # for 0 and n need only real part, but for 1...n-1 need real, imag # # so just get shifts for r[1], r[2], ..., r[n-1] # dang = dang.to(u.rad).value[1:-1:2] # dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) for j in xrange(rank, nt, size): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j + 1, nt, (tstart + dtsample * j * ntint).value)) # time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: if size > 1: fh1.seek((nskip + j) * recsize) # data just a series of byte pairs, of real and imag raw = fromfile(fh1, dtype, recsize) except (EOFError, IOError) as exc: print("Hit {}; writing pgm's".format(exc)) break if verbose == 'very': print("Read {} items".format(raw.size), end="") vals = raw.astype(np.float32).view(np.complex64).squeeze() # if dedisperse in {'coherent', 'by-channel'}: # fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs) # fine_cmplx = fine[1:-1].view(np.complex64) # fine_cmplx *= dd_coh # this overwrites parts of fine, as intended # vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs) # if verbose == 'very': # print("... dedispersed", end="") chan = vals.reshape(-1, nchan) if verbose == 'very': print("... power", end="") power = chan.real**2 + chan.imag**2 # current sample positions in stream isr = j * ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:, iw] += np.sum(power[isr // ntw == iw], axis=0) if verbose == 'very': print("... waterfall", end="") if do_foldspec: tsample = (tstart + isr * dtsample).value # times since start ibin = j * ntbin // nt # bin in the time series: 0..ntbin-1 for k in xrange(nchan): if dedisperse == 'coherent': t = tsample # already dedispersed else: t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase * ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k, :, ibin] += np.bincount(iphase, power[:, k], ngate) icount[k, :, ibin] += np.bincount(iphase, None, ngate) if verbose == 'very': print("... folded", end="") if 0: #done in gmrt.py (j+1)*ntbin//nt > ibin: # last addition to bin? # get normalised flux in each bin (where any were added) nonzero = icount > 0 nfoldspec = np.where(nonzero, foldspec / icount, 0.) # subtract phase average and store nfoldspec -= np.where( nonzero, np.sum(nfoldspec, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0) foldspec2[:, :, ibin] = nfoldspec # reset for next iteration foldspec *= 0 icount *= 0 if verbose == 'very': print("... added", end="") if verbose == 'very': print("... done") if verbose: print('read {0:6d} out of {1:6d}'.format(j + 1, nt)) if 0: # done in gmrt.py do_waterfall: nonzero = waterfall == 0. waterfall -= np.where( nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) return foldspec, icount, waterfall
def fold(fh1, dtype, samplerate, fedge, fedge_at_top, nchan, nt, ntint, nhead, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100): """FFT ARO data, fold by phase/time and make a waterfall series Parameters ---------- fh1 : file handle handle to file holding voltage timeseries dtype : numpy dtype or '4bit' or '1bit' way the data are stored in the file samplerate : float rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation nhead : int number of bytes to skip before reading (usually 0 for ARO) ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) dedisperse : None or string None, 'incoherent', 'coherent', 'by-channel' do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets """ # initialize folded spectrum and waterfall foldspec2 = np.zeros((nchan, ngate, ntbin)) nwsize = nt * ntint // ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (simple for ARO: 1 byte/sample) # double since we need to get ntint samples after FFT recsize = nchan * ntint * {np.int8: 2, '4bit': 1}[dtype] if verbose: print('Reading from {}'.format(fh1)) if nhead > 0: if verbose: print('Skipping {0} bytes'.format(nhead)) fh1.seek(nhead) foldspec = np.zeros((nchan, ngate), dtype=np.int) icount = np.zeros((nchan, ngate), dtype=np.int) dt1 = (1. / samplerate).to(u.s) # need 2*nchan real-valued samples for each FFT dtsample = nchan * 2 * dt1 # pre-calculate time delay due to dispersion in coarse channels freq = (fedge - rfftfreq(nchan * 2, dt1.value) * u.Hz if fedge_at_top else fedge + rfftfreq(nchan * 2, dt1.value) * u.Hz) # [::2] sets frequency channels to numerical recipes ordering dt = (dispersion_delay_constant * dm * (1. / freq[::2]**2 - 1. / fref**2)).to(u.s).value if dedisperse in {'coherent', 'by-channel'}: # pre-calculate required turns due to dispersion fcoh = (fedge - rfftfreq(nchan * 2 * ntint, dt1.value) * u.Hz if fedge_at_top else fedge + rfftfreq(nchan * 2 * ntint, dt1.value) * u.Hz) # set frequency relative to which dispersion is coherently corrected if dedisperse == 'coherent': _fref = fref else: # _fref = np.round((fcoh * dtsample).to(1).value) / dtsample _fref = np.repeat(freq.value, ntint) * freq.unit # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1. / _fref - 1. / fcoh)**2) * 360. * u.deg # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n] # for 0 and n need only real part, but for 1...n-1 need real, imag # so just get shifts for r[1], r[2], ..., r[n-1] dang = dang.to(u.rad).value[1:-1:2] dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) for j in xrange(nt): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j + 1, nt, dtsample.value * j * ntint)) # time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # data just a series of bytes, each containing one 8 bit or # two 4-bit samples (set by dtype in caller) raw = fromfile(fh1, dtype, recsize) except (EOFError, IOError) as exc: print("Hit {}; writing pgm's".format(exc)) break if verbose == 'very': print("Read {} items".format(raw.size), end="") vals = raw.astype(np.float32) if dedisperse in {'coherent', 'by-channel'}: fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs) fine_cmplx = fine[1:-1].view(np.complex64) fine_cmplx *= dd_coh # this overwrites parts of fine, as intended vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs) if verbose == 'very': print("... dedispersed", end="") chan2 = rfft(vals.reshape(-1, nchan * 2), axis=-1, overwrite_x=True, **_fftargs)**2 # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2] # re-order to Num.Rec. format: Re[0], Re[n/2], Re[1], .... power = np.hstack((chan2[:, :1] + chan2[:, -1:], chan2[:, 1:-1].reshape(-1, nchan - 1, 2).sum(-1))) if verbose == 'very': print("... power", end="") # current sample positions in stream isr = j * ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:, iw] += np.sum(power[isr // ntw == iw], axis=0) if verbose == 'very': print("... waterfall", end="") if do_foldspec: tsample = dtsample.value * isr # times since start for k in xrange(nchan): if dedisperse == 'coherent': t = tsample # already dedispersed else: t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase * ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k] += np.bincount(iphase, power[:, k], ngate) icount[k] += np.bincount(iphase, None, ngate) if verbose == 'very': print("... folded", end="") ibin = j * ntbin // nt # bin in the time series: 0..ntbin-1 if (j + 1) * ntbin // nt > ibin: # last addition to bin? # get normalised flux in each bin (where any were added) nonzero = icount > 0 nfoldspec = np.where(nonzero, foldspec / icount, 0.) # subtract phase average and store nfoldspec -= np.where( nonzero, np.sum(nfoldspec, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0) foldspec2[:, :, ibin] = nfoldspec # reset for next iteration foldspec *= 0 icount *= 0 if verbose == 'very': print("... added", end="") if verbose == 'very': print("... done") if verbose: print('read {0:6d} out of {1:6d}'.format(j + 1, nt)) if do_waterfall: nonzero = waterfall == 0. waterfall -= np.where( nonzero, np.sum(waterfall, 1, keepdims=True) / np.sum(nonzero, 1, keepdims=True), 0.) return foldspec2, waterfall
def fold(fh1, dtype, samplerate, fedge, fedge_at_top, nchan, nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol, dedisperse='incoherent', do_waterfall=True, do_foldspec=True, verbose=True, progress_interval=100, rfi_filter_raw=None, rfi_filter_power=None, comm=None): """FFT ARO data, fold by phase/time and make a waterfall series Parameters ---------- fh1 : file handle handle to file holding voltage timeseries dtype : numpy dtype or '4bit' or '1bit' way the data are stored in the file samplerate : float rate at which samples were originally taken and thus double the band width (frequency units) fedge : float edge of the frequency band (frequency units) fedge_at_top: bool whether edge is at top (True) or bottom (False) nchan : int number of frequency channels for FFT nt, ntint : int total number nt of sets, each containing ntint samples in each file hence, total # of samples is nt*ntint, with each sample containing a single polarisation nskip : int number of records (ntint * nchan * 2 / 2 bytes) to skip ngate, ntbin : int number of phase and time bins to use for folded spectrum ntbin should be an integer fraction of nt ntw : int number of time samples to combine for waterfall (does not have to be integer fraction of nt) dm : float dispersion measure of pulsar, used to correct for ism delay (column number density) fref: float reference frequency for dispersion measure phasepol : callable function that returns the pulsar phase for time in seconds relative to start of part of the file that is read (i.e., ignoring nhead) dedisperse : None or string None, 'incoherent', 'coherent', 'by-channel' do_waterfall, do_foldspec : bool whether to construct waterfall, folded spectrum (default: True) verbose : bool whether to give some progress information (default: True) progress_interval : int Ping every progress_interval sets comm : MPI communicator (default None) """ if comm is None: rank = 0 size = 1 else: rank = comm.rank size = comm.size # initialize folded spectrum and waterfall foldspec = np.zeros((nchan, ngate, ntbin)) icount = np.zeros((nchan, ngate, ntbin), dtype=np.int64) nwsize = nt*ntint//ntw waterfall = np.zeros((nchan, nwsize)) # size in bytes of records read from file (simple for ARO: 1 byte/sample) # double since we need to get ntint samples after FFT recsize = nchan*ntint*{np.int8: 2, '4bit': 1}[dtype] if verbose: print('Reading from {}'.format(fh1)) if nskip > 0: if verbose: print('Skipping {0} records = {1} bytes' .format(nskip, nskip*recsize)) # If MPI threading, the threads hop over one-another # and seeking is done in for-loop. if size == 1: fh1.seek(nskip * recsize) dt1 = (1./samplerate).to(u.s) # need 2*nchan real-valued samples for each FFT dtsample = nchan * 2 * dt1 tstart = dtsample * ntint * nskip # pre-calculate time delay due to dispersion in coarse channels freq = (fedge - rfftfreq(nchan*2, dt1.value) * u.Hz if fedge_at_top else fedge + rfftfreq(nchan*2, dt1.value) * u.Hz) # [::2] sets frequency channels to numerical recipes ordering dt = (dispersion_delay_constant * dm * (1./freq[::2]**2 - 1./fref**2)).to(u.s).value if dedisperse in {'coherent', 'by-channel'}: # pre-calculate required turns due to dispersion fcoh = (fedge - rfftfreq(nchan*2*ntint, dt1.value) * u.Hz if fedge_at_top else fedge + rfftfreq(nchan*2*ntint, dt1.value) * u.Hz) # set frequency relative to which dispersion is coherently corrected if dedisperse == 'coherent': _fref = fref else: # _fref = np.round((fcoh * dtsample).to(1).value) / dtsample _fref = np.repeat(freq.value, ntint) * freq.unit # (check via eq. 5.21 and following in # Lorimer & Kramer, Handbook of Pulsar Astrono dang = (dispersion_delay_constant * dm * fcoh * (1./_fref-1./fcoh)**2) * 360. * u.deg # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n] # for 0 and n need only real part, but for 1...n-1 need real, imag # so just get shifts for r[1], r[2], ..., r[n-1] dang = dang.to(u.rad).value[1:-1:2] dd_coh = np.exp(dang * 1j).conj().astype(np.complex64) for j in xrange(rank, nt, size): if verbose and j % progress_interval == 0: print('Doing {:6d}/{:6d}; time={:18.12f}'.format( j+1, nt, (tstart+dtsample*j*ntint).value)) # time since start # just in case numbers were set wrong -- break if file ends # better keep at least the work done try: # data just a series of bytes, each containing one 8 bit or # two 4-bit samples (set by dtype in caller) if size > 1: fh1.seek((j+nskip)*recsize) raw = fromfile(fh1, dtype, recsize) except(EOFError, IOError) as exc: print("Hit {}; writing pgm's".format(exc)) break if verbose == 'very': print("Read {} items".format(raw.size), end="") if rfi_filter_raw: raw = rfi_filter_raw(raw) print("... raw RFI", end="") vals = raw.astype(np.float32) if dedisperse in {'coherent', 'by-channel'}: fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs) fine_cmplx = fine[1:-1].view(np.complex64) fine_cmplx *= dd_coh # this overwrites parts of fine, as intended vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs) if verbose == 'very': print("... dedispersed", end="") chan2 = rfft(vals.reshape(-1, nchan*2), axis=-1, overwrite_x=True, **_fftargs)**2 # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2] # re-order to Num.Rec. format: Re[0], Re[n/2], Re[1], .... power = np.hstack((chan2[:,:1]+chan2[:,-1:], chan2[:,1:-1].reshape(-1,nchan-1,2).sum(-1))) if verbose == 'very': print("... power", end="") if rfi_filter_power: power = rfi_filter_power(power) print("... power RFI", end="") # current sample positions in stream isr = j*ntint + np.arange(ntint) if do_waterfall: # loop over corresponding positions in waterfall for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1): if iw < nwsize: # add sum of corresponding samples waterfall[:,iw] += np.sum(power[isr//ntw == iw], axis=0) if verbose == 'very': print("... waterfall", end="") if do_foldspec: tsample = (tstart + isr*dtsample).value # times since start ibin = j*ntbin//nt # bin in the time series: 0..ntbin-1 for k in xrange(nchan): if dedisperse == 'coherent': t = tsample # already dedispersed else: t = tsample - dt[k] # dedispersed times phase = phasepol(t) # corresponding PSR phases iphase = np.remainder(phase*ngate, ngate).astype(np.int) # sum and count samples by phase bin foldspec[k, :, ibin] += np.bincount(iphase, power[:, k], ngate) icount[k, :, ibin] += np.bincount(iphase, power[:, k] != 0., ngate) if verbose == 'very': print("... folded", end="") if verbose == 'very': print("... done") if verbose: print('read {0:6d} out of {1:6d}'.format(j+1, nt)) return foldspec, icount, waterfall