Exemple #1
0
    def _fourier_cross(self, lc1, lc2):
        """
        Fourier transform the two light curves, then compute the cross spectrum.
        Computed as CS = lc1 x lc2* (where lc2 is the one that gets
        complex-conjugated)

        Parameters
        ----------
        lc1: :class:`stingray.Lightcurve` object
            One light curve to be Fourier transformed. Ths is the band of
            interest or channel of interest.

        lc2: :class:`stingray.Lightcurve` object
            Another light curve to be Fourier transformed.
            This is the reference band.

        Returns
        -------
        fr: numpy.ndarray
            The squared absolute value of the Fourier amplitudes

        """
        fourier_1 = fft(lc1.counts)  # do Fourier transform 1
        fourier_2 = fft(lc2.counts)  # do Fourier transform 2

        freqs = fftfreq(lc1.n, lc1.dt)
        cross = np.multiply(fourier_1[freqs > 0],
                            np.conj(fourier_2[freqs > 0]))

        return freqs[freqs > 0], cross
Exemple #2
0
    def get_fft_freq(self, freq, ntint, dm):
        dtsample = 2 * self.nfreq * self.dt

        fcoh = freq - fftfreq(ntint, dtsample)[:, np.newaxis]

        _fref = freq[np.newaxis]

        dang = (self.dispersion_delay_constant * dm * fcoh *
                (1. / _fref - 1. / fcoh)**2)

        dd_coh = np.exp(-1j * dang).astype(np.complex64)

        return dd_coh
     def get_fft_freq(self, freq, ntint, dm):
          dtsample = 2 * self.nfreq * self.dt

          fcoh = freq - fftfreq(
                    ntint, dtsample)[:, np.newaxis]
          
          _fref = freq[np.newaxis]

          dang = (self.dispersion_delay_constant * dm * fcoh *
                              (1./_fref - 1./fcoh)**2) 

          dd_coh = np.exp(-1j * dang).astype(np.complex64)

          return dd_coh
Exemple #4
0
def fold(fh,
         comm,
         samplerate,
         fedge,
         fedge_at_top,
         nchan,
         nt,
         ntint,
         ngate,
         ntbin,
         ntw,
         dm,
         fref,
         phasepol,
         dedisperse='incoherent',
         do_waterfall=True,
         do_foldspec=True,
         verbose=True,
         progress_interval=100,
         rfi_filter_raw=None,
         rfi_filter_power=None,
         return_fits=False):
    """
    FFT data, fold by phase/time and make a waterfall series

    Folding is done from the position the file is currently in

    Parameters
    ----------
    fh : file handle
        handle to file holding voltage timeseries
    comm: MPI communicator or None
        will use size, rank attributes
    samplerate : Quantity
        rate at which samples were originally taken and thus double the
        band width (frequency units)
    fedge : float
        edge of the frequency band (frequency units)
    fedge_at_top: bool
        whether edge is at top (True) or bottom (False)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*ntint, with each sample containing
        a single polarisation
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of the file that is read.
    dedisperse : None or string (default: incoherent).
        None, 'incoherent', 'coherent', 'by-channel'.
        Note: None really does nothing
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool or int
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets
    return_fits : bool (default: False)
        return a subint fits table for rank == 0 (None otherwise)

    """
    assert dedisperse in (None, 'incoherent', 'by-channel', 'coherent')
    assert nchan % fh.nchan == 0
    if dedisperse == 'by-channel':
        oversample = nchan // fh.nchan
        assert ntint % oversample == 0
    else:
        oversample = 1

    if dedisperse == 'coherent' and fh.nchan > 1:
        raise ValueError("For coherent dedispersion, data must be "
                         "unchannelized before folding.")

    if comm is None:
        mpi_rank = 0
        mpi_size = 1
    else:
        mpi_rank = comm.rank
        mpi_size = comm.size

    npol = getattr(fh, 'npol', 1)
    assert npol == 1 or npol == 2
    if verbose > 1 and mpi_rank == 0:
        print("Number of polarisations={}".format(npol))

    # initialize folded spectrum and waterfall
    # TODO: use estimated number of points to set dtype
    if do_foldspec:
        foldspec = np.zeros((ntbin, nchan, ngate, npol**2), dtype=np.float32)
        icount = np.zeros((ntbin, nchan, ngate), dtype=np.int32)
    else:
        foldspec = None
        icount = None

    if do_waterfall:
        nwsize = nt * ntint // ntw
        waterfall = np.zeros((nwsize, nchan, npol**2), dtype=np.float64)
    else:
        waterfall = None

    if verbose and mpi_rank == 0:
        print('Reading from {}'.format(fh))

    nskip = fh.tell() / fh.blocksize
    if nskip > 0:
        if verbose and mpi_rank == 0:
            print('Starting {0} blocks = {1} bytes out from start.'.format(
                nskip, nskip * fh.blocksize))

    dt1 = (1. / samplerate).to(u.s)
    # need 2*nchan real-valued samples for each FFT
    if fh.telescope == 'lofar':
        dtsample = fh.dtsample
    else:
        dtsample = nchan // oversample * 2 * dt1
    tstart = dtsample * ntint * nskip

    # pre-calculate time delay due to dispersion in coarse channels
    # for channelized data, frequencies are known

    if fh.nchan == 1:
        if getattr(fh, 'data_is_complex', False):
            # for complex data, really each complex sample consists of
            # 2 real ones, so multiply dt1 by 2.
            if fedge_at_top:
                freq = fedge - fftfreq(nchan, 2. * dt1.value) * u.Hz
            else:
                freq = fedge + fftfreq(nchan, 2. * dt1.value) * u.Hz
        else:
            if fedge_at_top:
                freq = fedge - rfftfreq(nchan * 2, dt1.value)[::2] * u.Hz
            else:
                freq = fedge + rfftfreq(nchan * 2, dt1.value)[::2] * u.Hz
        freq_in = freq
    else:
        # input frequencies may not be the ones going out
        freq_in = fh.frequencies
        if oversample == 1:
            freq = freq_in
        else:
            if fedge_at_top:
                freq = (freq_in[:, np.newaxis] -
                        u.Hz * fftfreq(oversample, dtsample.value))
            else:
                freq = (freq_in[:, np.newaxis] +
                        u.Hz * fftfreq(oversample, dtsample.value))
    ifreq = freq.ravel().argsort()

    # pre-calculate time offsets in (input) channelized streams
    dt = dispersion_delay_constant * dm * (1. / freq_in**2 - 1. / fref**2)

    if dedisperse in ['coherent', 'by-channel']:
        # pre-calculate required turns due to dispersion
        if fedge_at_top:
            fcoh = (freq_in[np.newaxis, :] -
                    u.Hz * fftfreq(ntint, dtsample.value)[:, np.newaxis])
        else:
            fcoh = (freq_in[np.newaxis, :] +
                    u.Hz * fftfreq(ntint, dtsample.value)[:, np.newaxis])

        # set frequency relative to which dispersion is coherently corrected
        if dedisperse == 'coherent':
            _fref = fref
        else:
            _fref = freq_in[np.newaxis, :]
        # (check via eq. 5.21 and following in
        # Lorimer & Kramer, Handbook of Pulsar Astronomy
        dang = (dispersion_delay_constant * dm * fcoh *
                (1. / _fref - 1. / fcoh)**2) * u.cycle

        with u.set_enabled_equivalencies(u.dimensionless_angles()):
            dd_coh = np.exp(dang * 1j).conj().astype(np.complex64)

        # add dimension for polarisation
        dd_coh = dd_coh[..., np.newaxis]

    # Calculate the part of the whole file this node should handle.
    size_per_node = (nt - 1) // mpi_size + 1
    start_block = mpi_rank * size_per_node
    end_block = min((mpi_rank + 1) * size_per_node, nt)
    for j in range(start_block, end_block):
        if verbose and j % progress_interval == 0:
            print('#{:4d}/{:4d} is doing {:6d}/{:6d} [={:6d}/{:6d}]; '
                  'time={:18.12f}'.format(
                      mpi_rank, mpi_size, j + 1, nt, j - start_block + 1,
                      end_block - start_block,
                      (tstart +
                       dtsample * j * ntint).value))  # time since start

        # Just in case numbers were set wrong -- break if file ends;
        # better keep at least the work done.
        try:
            raw = fh.seek_record_read(int((nskip + j) * fh.blocksize),
                                      fh.blocksize)
        except (EOFError, IOError) as exc:
            print("Hit {0!r}; writing data collected.".format(exc))
            break
        if verbose >= 2:
            print("#{:4d}/{:4d} read {} items".format(mpi_rank, mpi_size,
                                                      raw.size),
                  end="")

        if npol == 2:  # multiple polarisations
            raw = raw.view(raw.dtype.fields.values()[0][0])

        if fh.nchan == 1:  # raw.shape=(ntint*npol)
            raw = raw.reshape(-1, npol)
        else:  # raw.shape=(ntint, nchan*npol)
            raw = raw.reshape(-1, fh.nchan, npol)

        if rfi_filter_raw is not None:
            raw, ok = rfi_filter_raw(raw)
            if verbose >= 2:
                print("... raw RFI (zap {0}/{1})".format(
                    np.count_nonzero(~ok), ok.size),
                      end="")

        if np.can_cast(raw.dtype, np.float32):
            vals = raw.astype(np.float32)
        else:
            assert raw.dtype.kind == 'c'
            vals = raw

        if fh.nchan == 1:
            # have real-valued time stream of complex baseband
            # if we need some coherentdedispersion, do FT of whole thing,
            # otherwise to output channels
            if raw.dtype.kind == 'c':
                ftchan = nchan if dedisperse == 'incoherent' else len(vals)
                vals = fft(vals.reshape(-1, ftchan, npol),
                           axis=1,
                           overwrite_x=True,
                           **_fftargs)
            else:  # real data
                ftchan = nchan if dedisperse == 'incoherent' else len(
                    vals) // 2
                vals = rfft(vals.reshape(-1, ftchan * 2, npol),
                            axis=1,
                            overwrite_x=True,
                            **_fftargs)
                # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2]
                # re-order to normal fft format (like Numerical Recipes):
                # Re[0], Re[n], Re[1], Im[1], .... (channel 0 is junk anyway)
                vals = np.hstack(
                    (vals[:, 0], vals[:, -1], vals[:,
                                                   1:-1])).view(np.complex64)
            # for incoherent, vals.shape=(ntint, nchan, npol) -> OK
            # for others, have           (1, ntint*nchan, npol)
            # reshape(nchan, ntint) gives rough as slowly varying -> .T
            if dedisperse != 'incoherent':
                fine = vals.reshape(nchan, -1, npol).transpose(1, 0, 2)
                # now have fine.shape=(ntint, nchan, npol)

        else:  # data already channelized
            if dedisperse == 'by-channel':
                fine = fft(vals, axis=0, overwrite_x=True, **_fftargs)
                # have fine.shape=(ntint, fh.nchan, npol)

        if dedisperse in ['coherent', 'by-channel']:
            fine *= dd_coh
            # rechannelize to output channels
            if oversample > 1 and dedisperse == 'by-channel':
                # fine.shape=(ntint*oversample, chan_in, npol)
                #           =(coarse,fine,fh.chan, npol)
                #  -> reshape(oversample, ntint, fh.nchan, npol)
                # want (ntint=fine, fh.nchan, oversample, npol) -> .transpose
                fine = (fine.reshape(oversample, -1, fh.nchan, npol).transpose(
                    1, 2, 0, 3).reshape(-1, nchan, npol))
            # now, for both,     fine.shape=(ntint, nchan, npol)
            vals = ifft(fine, axis=0, overwrite_x=True, **_fftargs)
            # vals[time, chan, pol]
            if verbose >= 2:
                print("... dedispersed", end="")

        if npol == 1:
            power = vals.real**2 + vals.imag**2
        else:
            p0 = vals[..., 0]
            p1 = vals[..., 1]
            power = np.empty(vals.shape[:-1] + (4, ), np.float32)
            power[..., 0] = p0.real**2 + p0.imag**2
            power[..., 1] = p0.real * p1.real + p0.imag * p1.imag
            power[..., 2] = p0.imag * p1.real - p0.real * p1.imag
            power[..., 3] = p1.real**2 + p1.imag**2

        if verbose >= 2:
            print("... power", end="")

        if rfi_filter_power is not None:
            power = rfi_filter_power(power)
            print("... power RFI", end="")

        # current sample positions in stream
        isr = j * (ntint // oversample) + np.arange(ntint // oversample)

        if do_waterfall:
            # loop over corresponding positions in waterfall
            for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1):
                if iw < nwsize:  # add sum of corresponding samples
                    waterfall[iw, :] += np.sum(power[isr // ntw == iw],
                                               axis=0)[ifreq]
            if verbose >= 2:
                print("... waterfall", end="")

        if do_foldspec:
            ibin = (j * ntbin) // nt  # bin in the time series: 0..ntbin-1

            # times since start
            tsample = (tstart + isr * dtsample * oversample)[:, np.newaxis]
            # correct for delay if needed
            if dedisperse in ['incoherent', 'by-channel']:
                # tsample.shape=(ntint/oversample, nchan_in)
                tsample = tsample - dt

            phase = (phasepol(tsample.to(u.s).value.ravel()).reshape(
                tsample.shape))
            # corresponding PSR phases
            iphase = np.remainder(phase * ngate, ngate).astype(np.int)

            for k, kfreq in enumerate(ifreq):  # sort in frequency while at it
                iph = iphase[:, (0 if iphase.shape[1] == 1 else kfreq //
                                 oversample)]
                # sum and count samples by phase bin
                for ipow in xrange(npol**2):
                    foldspec[ibin, k, :,
                             ipow] += np.bincount(iph, power[:, kfreq, ipow],
                                                  ngate)
                icount[ibin,
                       k, :] += np.bincount(iph, power[:, kfreq, 0] != 0.,
                                            ngate)

            if verbose >= 2:
                print("... folded", end="")

        if verbose >= 2:
            print("... done")

    #Commented out as workaround, this was causing "Referenced before assignment" errors with JB data
    #if verbose >= 2 or verbose and mpi_rank == 0:
    #    print('#{:4d}/{:4d} read {:6d} out of {:6d}'
    #          .format(mpi_rank, mpi_size, j+1, nt))

    if npol == 1:
        if do_foldspec:
            foldspec = foldspec.reshape(foldspec.shape[:-1])
        if do_waterfall:
            waterfall = waterfall.reshape(waterfall.shape[:-1])

    return foldspec, icount, waterfall
Exemple #5
0
def correlate(fh1, fh2, dm, nchan, ngate, ntbin, nt, ntw,
              dedisperse='incoherent', rfi_filter_raw=None, fref=_fref,
              save_xcorr=True, do_foldspec=True, phasepol=None,
              do_waterfall=True,
              t0=None, t1=None, comm=None, verbose=2):
    """
    fh1 : file handle of first data stream
    fh2 : file handle of second data stream
    dm :
    nchan :
    t0 : start time (isot) x-corr
         [None] start at common beginning of (fh1, fh2)
    t1 : end time of (isot) x-corr
         [None] end at common ending of (fh1, fh2)
    comm : MPI communicator or None

    """
    fhs = [fh1, fh2]
    if comm is None:
        rank = 0
        size = 1
    else:
        rank = comm.rank
        size = comm.size

    # find nearest starttime landing on same sample
    if t0 is None:
        t0 = max(fh1.time0, fh2.time0)
        print("Starting at %s" % t0)
    t0 = Time(t0, scale='utc')
    t1 = Time(t1, scale='utc')

    # find time offset between the two fh's, accomodating the relative phase
    # delay of the pulsar (the propagation delay)
    phases = [phasepol[i]((t0 - fhs[i].time0).sec) for i in [0, 1]]
    F0 = np.mean([phasepol[i].deriv(1)((t0 - fhs[i].time0).sec)
                  for i in [0, 1]])
    # propagation delay offset from fh1
    dts = [0. * u.s, np.diff(phases)[0] / F0 * u.s]
    if rank == 0:
        print("Will read fh2 ({0}) {1} ahead of fh1 ({2}) "
              "for propagation delay".format(fh2.telescope,
                                             dts[1].to(u.millisecond),
                                             fh1.telescope))

    # prep the fhs for xcorr stream, setting up channelization, dedispersion...
    for i, fh in enumerate(fhs):
        fh.seek(t0)
        # byte offset for propagation delay
        fh.prop_delay = int(round(dts[i] / fh.dtsample)) * fh.recordsize
        fh.dt1 = (1. / fh.samplerate).to(u.s)
        fh.this_nskip = fh.nskip(t0)
        if rank == 1:
            return None
        # set up FFT functions: real vs complex fft's
        if fh.nchan > 1:
            fh.thisfft = fft
            fh.thisifft = ifft
            fh.thisfftfreq = fftfreq
        else:
            fh.thisfft = rfft
            fh.thisifft = irfft
            fh.thisfftfreq = rfftfreq

        # pre-calculate time delay due to dispersion in coarse channels
        # LOFAR data is already channelized
        if fh.nchan > 1:
            fh.freq = fh.frequencies
        else:
            if fh.fedge_at_top:
                fh.freq = fh.fedge\
                    - fh.thisfftfreq(nchan * 2, fh.dt1.value) * u.Hz
            else:
                fh.freq = fh.fedge\
                    + fh.thisfftfreq(nchan * 2, fh.dt1.value) * u.Hz
            # [::2] sets frequency channels to numerical recipes ordering
            # or, rfft has an unusual ordering
            fh.freq = fh.freq[::2]

        # sort channels from low --> high frequency
        if np.diff(fh.freq.value).mean() < 0.:
            if rank == 0 and verbose > 1:
                print("Will frequency-sort {0} data before x-corr"
                      .format(fh.telescope))
            fh.freqsort = True
        else:
            fh.freqsort = False

        fh.dt = (dispersion_delay_constant * dm *
                 ( 1./fh.freq**2 - 1./fref**2) ).to(u.s).value

        # number of time bins to np.roll the channels for incoherent dedisperse
        if dedisperse == 'incoherent':
            fh.ndt = (fh.dt / fh.dtsample.to(u.s).value)
            fh.ndt = -1 * np.rint(fh.ndt).astype(np.int)

        elif dedisperse in ['coherent', 'by-channel']:
            # pre-calculate required turns due to dispersion
            if fh.nchan > 1:
                fcoh = (fh.freq[np.newaxis, :] + fftfreq(fh.ntint(nchan),
                        fh.dtsample.value)[:, np.newaxis] * u.Hz)
            else:
                if fh.fedge_at_top:
                    fcoh = fh.fedge - fh.thisfftfreq(nchan*2*fh.ntint(nchan),
                                                     fh.dt1.value) * u.Hz
                else:
                    fcoh = fh.fedge + fh.thisfftfreq(nchan*2*fh.ntint(nchan),
                                                     fh.dt1.value) * u.Hz

            #set frequency relative to which dispersion is coherently corrected
            if dedisperse == 'coherent':
                _fref = fref
            else:
                #fref = np.round((fcoh * fh.dtsample).to(1).value)/fh.dtsample
                _fref = np.repeat(fh.freq.value, fh.ntint(nchan))*fh.freq.unit
            # (check via eq. 5.21 and following in
            # Lorimer & Kramer, Handbook of Pulsar Astrono
            dang = (dispersion_delay_constant * dm * fcoh *
                    (1./_fref-1./fcoh)**2) * 360. * u.deg

            if fh.thisfftfreq is rfftfreq:
                # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n]
                # for 0 and n need only real part, but for 1...n-1 need real, imag
                # so just get shifts for r[1], r[2], ..., r[n-1]
                dang = dang.to(u.rad).value[1:-1:2]
            else:
                dang = dang.to(u.rad).value

            fh.dd_coh = np.exp(dang * 1j).conj().astype(np.complex64)
    #### done fh setup ###

    ## xcorr setup
    # data-reading params (to help read in same-size time chunks and average
    # onto the same time-and-frequency grids)
    (Rf, Tf, NUf, fkeep, freqs, rows) = data_averaging_coeffs(fh1, fh2)
    nrows = int(min(rows[0] * Tf[1] / Tf[0], rows[1] * Tf[0] / Tf[1]))

    # summarize the (re)sampling
    if rank == 0:
        tmp = fh1.dtsample.to(u.s).value * fh1.blocksize / fh1.recordsize*Rf[0]
        print("\nReading {0} blocks of fh1, {1} blocks of fh2, "
              "for equal timeblocks of {2} sec ".format(Rf[0], Rf[1], tmp))

        if rank == 0 and verbose > 1:
            tmp = np.diff(freqs).mean()
            print("Averaging over {0} channels in fh1, {1} in fh2, for equal "
                  "frequency bins of {2} MHz".format(NUf[0], NUf[1], tmp))
            tmp = fh1.dtsample.to(u.s).value*Tf[0]
            print("Averaging over {0} timesteps in fh1, {1} in fh2, for equal "
                  "samples of {2} s\n".format(Tf[0], Tf[1], tmp))

        # check if we are averaging both fh's
        if rank == 0 and np.all(np.array(Tf) != 1):
            txt = "Note, averaging both fh's in time to have similar sample "\
                  "size. You may want to implement interpolation, or think "\
                  "more about this situation"
            print(txt)
        if rank == 0 and np.all(np.array(NUf) != 1):
            txt = "Note, averaging both fh's in freq to have similar sample "\
                  "size. You may want to implement interpolation, or think "\
                  "more about this situation"
            print(txt)

    # initialize the folded spectrum and waterfall
    nchans = min([len(fh.freq[fkeep[i]] / NUf[i]) for i, fh in enumerate(fhs)])
    foldspec = np.zeros((nchans, ngate, ntbin))
    icount = np.zeros((nchans, ngate, ntbin), dtype=np.int64)
    nwsize = min(nt * fh1.ntint(nchan) // ntw, nt * fh2.ntint(nchan) // ntw)
    waterfall = np.zeros((nchans, nwsize))

    if save_xcorr:
        # output dataset
        outname = "{0}{1}_{2}_{3}.hdf5".format(
            fh1.telescope[0], fh2.telescope[0], t0, t1)
        # mpi doesn't like colons
        outname = outname.replace(':', '')
        fcorr = h5py.File(outname, 'w')  # , driver='mpio', comm=comm)
        ## create the x-corr output file
        # save the frequency grids to help with future TODO: interpolate onto
        # same frequency grid. For now the frequencies fall within same bin
        if rank == 0 and verbose:
            print("Saving x-corr to %s\n" % outname)
        fcorr.create_dataset('freqs', data=np.hstack([f.to(u.MHz).value
                                                      for f in freqs]))

        # the x-corr data [tsteps, channels]
        dset = fcorr.create_dataset('corr', (nrows, freqs[0].size),
                                    dtype='complex64', maxshape=(None, nchan))
        dset.attrs.create('dedisperse', data=str(dedisperse))
        dset.attrs.create('tsample',
                          data=[fhs[i].dtsample.to(u.s).value * Tf[i]
                                for i in [0, 1]])
        dset.attrs.create('chanbw', data=np.diff(freqs).mean())

    # start reading the data
    # this_nskip moves to 't0', rank is for MPI
    idx = rank
    raws = [fh.seek_record_read((fh.this_nskip + idx * Rf[i])
                                * fh.blocksize - fh.prop_delay,
                                fh.blocksize * Rf[i])
            for i, fh in enumerate(fhs)]
    endread = False
    print("read step (idx), fh1.time(), fh2.time() ")
    print("\t inclues {0} propagation delay".format(dts[1]))
    while np.all([raw.size > 0 for raw in raws]):
        if verbose:
            print("idx",idx, fh1.time(), fh2.time())

        vals = raws
        chans = [None, None]
        tsamples = [None, None]
        isrs = [None, None]

        # prep the data (channelize, dedisperse, ...)

        for i, fh in enumerate(fhs):
            if rfi_filter_raw is not None:
                raws[i], ok = rfi_filter_raw(raws[i], nchan)

            if fh.telescope == 'aro':
                vals[i] = raws[i].astype(np.float32)
            else:
                vals[i] = raws[i]

            if dedisperse in ['coherent', 'by-channel']:
                fine = fh.thisfft(vals[i], axis=0, overwrite_x=True,
                                  **_fftargs)
                if fh.thisfft is rfft:
                    fine_cmplx = fine[1:-1].view(np.complex64)
                    # overwrites parts of fine, as intended
                    fine_cmplx *= fh.dd_coh
                else:
                    fine *= dd_coh
                vals[i] = fh.thisifft(fine, axis=0, overwrite_x=True,
                                      **_fftargs)

            if fh.nchan == 1:
                # ARO data should fall here
                chans[i] = fh.thisfft(vals[i].reshape(-1, nchan * 2), axis=-1,
                                      overwrite_x=True, **_fftargs)
            else:  # lofar and gmrt-phased are already channelised
                chans[i] = vals[i]

            # dedisperse on original (raw) time/freq grid
            # TODO: profile for speedup
            if dedisperse == 'incoherent':
                for ci, v in enumerate(fh.ndt):
                    chans[i][..., ci] = np.roll(chans[i][..., ci], v, axis=0)

            # average onto same time grid
            chans[i] = chans[i].reshape(Tf[i], chans[i].shape[0] / Tf[i], -1)\
                      .mean(axis=0)

            # average onto same freq grid
            chans[i] = chans[i][..., fkeep[i]]
            chans[i] = chans[i].reshape(-1, chans[i].shape[1] / NUf[i],
                                        NUf[i]).mean(axis=-1)

            # current sample positions in stream
            # (each averaged onto same time grid)
            isrs[i] = idx * rows[i] + np.arange(rows[i])
            tsamples[i] = (fh.this_nskip * fh.dtsample * fh.ntint(nchan)
                           + isrs[i] * fh.dtsample)
            tsamples[i] = tsamples[i].reshape(-1, Tf[i]).mean(axis=-1)

            # finally sort the channels low --> high (if necessary)
            # before x-correlating
            if fh.freqsort:
                # TODO: need to think about ordering
                chans[i] = chans[i][..., ::-1]

        # x-correlate
        xpower = chans[0] * chans[1].conjugate()

        if do_waterfall:
            # loop over corresponding positions in waterfall
            isr = idx * nrows + np.arange(nrows)
            for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1):
                if iw < nwsize:  # add sum of corresponding samples
                    waterfall[0:xpower.shape[1], iw] += \
                        np.abs(np.sum(xpower[isr // ntw == iw], axis=0))

        if do_foldspec:
            # time since start (average of the two streams)
            # TODO: think about this: take one stream, both, average, ...
            #tsample = np.mean(tsamples, axis=0)
            tsample = np.array(tsamples)

            # timeseries already dedispersed
            phase = phasepol[0](tsample[0])
            iphase = np.remainder(phase * ngate,
                                  ngate).astype(np.int)

            # bin in the time series: 0..ntbin-1
            ibin = idx * ntbin // nt
            for k in xrange(nchans):  # equally xpower.shape[1]
                foldspec[k, :, ibin] += np.bincount(iphase,
                                                    np.abs(xpower[:, k]),
                                                    ngate)
                icount[k, :, ibin] += np.bincount(iphase,
                                                  np.abs(xpower[:, k]) != 0.,
                                                  ngate)

        if save_xcorr:
            curshape = dset.shape
            nx = max(nrows * (idx + 1), curshape[0])
            dset.resize((nx + nrows, curshape[1]))
            # TODO: h5py mpio stalls here... turn off save_xcorr for mpirun
            dset[nrows * idx: nrows * (idx + 1)] = xpower

        # read in next dataset if we haven't hit t1 yet
        for fh in [fh1, fh2]:
            if (fh.time() - t1).sec > 0.:
                endread = True

        if endread:
            break
        else:
            idx += size
            raws = [fh.seek_record_read((fh.this_nskip + idx * Rf[i])
                                        * fh.blocksize - fh.prop_delay,
                                        fh.blocksize * Rf[i])
                    for i, fh in enumerate(fhs)]
    if save_xcorr:
        fcorr.close()
    return foldspec, icount, waterfall
Exemple #6
0
def correlate(fh1,
              fh2,
              dm,
              nchan,
              ngate,
              ntbin,
              nt,
              ntw,
              dedisperse='incoherent',
              rfi_filter_raw=None,
              fref=_fref,
              save_xcorr=True,
              do_foldspec=True,
              phasepol=None,
              do_waterfall=True,
              t0=None,
              t1=None,
              comm=None,
              verbose=2):
    """
    fh1 : file handle of first data stream
    fh2 : file handle of second data stream
    dm :
    nchan :
    t0 : start time (isot) x-corr
         [None] start at common beginning of (fh1, fh2)
    t1 : end time of (isot) x-corr
         [None] end at common ending of (fh1, fh2)
    comm : MPI communicator or None

    """
    fhs = [fh1, fh2]
    if comm is None:
        rank = 0
        size = 1
    else:
        rank = comm.rank
        size = comm.size

    # find nearest starttime landing on same sample
    if t0 is None:
        t0 = max(fh1.time0, fh2.time0)
        print("Starting at %s" % t0)
    t0 = Time(t0, scale='utc')
    t1 = Time(t1, scale='utc')

    # find time offset between the two fh's, accomodating the relative phase
    # delay of the pulsar (the propagation delay)
    phases = [phasepol[i]((t0 - fhs[i].time0).sec) for i in [0, 1]]
    F0 = np.mean(
        [phasepol[i].deriv(1)((t0 - fhs[i].time0).sec) for i in [0, 1]])
    # propagation delay offset from fh1
    dts = [0. * u.s, np.diff(phases)[0] / F0 * u.s]
    if rank == 0:
        print("Will read fh2 ({0}) {1} ahead of fh1 ({2}) "
              "for propagation delay".format(fh2.telescope,
                                             dts[1].to(u.millisecond),
                                             fh1.telescope))

    # prep the fhs for xcorr stream, setting up channelization, dedispersion...
    for i, fh in enumerate(fhs):
        fh.seek(t0)
        # byte offset for propagation delay
        fh.prop_delay = int(round(dts[i] / fh.dtsample)) * fh.recordsize
        fh.dt1 = (1. / fh.samplerate).to(u.s)
        fh.this_nskip = fh.nskip(t0)
        if rank == 1:
            return None
        # set up FFT functions: real vs complex fft's
        if fh.nchan > 1:
            fh.thisfft = fft
            fh.thisifft = ifft
            fh.thisfftfreq = fftfreq
        else:
            fh.thisfft = rfft
            fh.thisifft = irfft
            fh.thisfftfreq = rfftfreq

        # pre-calculate time delay due to dispersion in coarse channels
        # LOFAR data is already channelized
        if fh.nchan > 1:
            fh.freq = fh.frequencies
        else:
            if fh.fedge_at_top:
                fh.freq = fh.fedge\
                    - fh.thisfftfreq(nchan * 2, fh.dt1.value) * u.Hz
            else:
                fh.freq = fh.fedge\
                    + fh.thisfftfreq(nchan * 2, fh.dt1.value) * u.Hz
            # [::2] sets frequency channels to numerical recipes ordering
            # or, rfft has an unusual ordering
            fh.freq = fh.freq[::2]

        # sort channels from low --> high frequency
        if np.diff(fh.freq.value).mean() < 0.:
            if rank == 0 and verbose > 1:
                print("Will frequency-sort {0} data before x-corr".format(
                    fh.telescope))
            fh.freqsort = True
        else:
            fh.freqsort = False

        fh.dt = (dispersion_delay_constant * dm *
                 (1. / fh.freq**2 - 1. / fref**2)).to(u.s).value

        # number of time bins to np.roll the channels for incoherent dedisperse
        if dedisperse == 'incoherent':
            fh.ndt = (fh.dt / fh.dtsample.to(u.s).value)
            fh.ndt = -1 * np.rint(fh.ndt).astype(np.int)

        elif dedisperse in ['coherent', 'by-channel']:
            # pre-calculate required turns due to dispersion
            if fh.nchan > 1:
                fcoh = (fh.freq[np.newaxis, :] + fftfreq(
                    fh.ntint(nchan), fh.dtsample.value)[:, np.newaxis] * u.Hz)
            else:
                if fh.fedge_at_top:
                    fcoh = fh.fedge - fh.thisfftfreq(
                        nchan * 2 * fh.ntint(nchan), fh.dt1.value) * u.Hz
                else:
                    fcoh = fh.fedge + fh.thisfftfreq(
                        nchan * 2 * fh.ntint(nchan), fh.dt1.value) * u.Hz

            #set frequency relative to which dispersion is coherently corrected
            if dedisperse == 'coherent':
                _fref = fref
            else:
                #fref = np.round((fcoh * fh.dtsample).to(1).value)/fh.dtsample
                _fref = np.repeat(fh.freq.value,
                                  fh.ntint(nchan)) * fh.freq.unit
            # (check via eq. 5.21 and following in
            # Lorimer & Kramer, Handbook of Pulsar Astrono
            dang = (dispersion_delay_constant * dm * fcoh *
                    (1. / _fref - 1. / fcoh)**2) * 360. * u.deg

            if fh.thisfftfreq is rfftfreq:
                # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n]
                # for 0 and n need only real part, but for 1...n-1 need real, imag
                # so just get shifts for r[1], r[2], ..., r[n-1]
                dang = dang.to(u.rad).value[1:-1:2]
            else:
                dang = dang.to(u.rad).value

            fh.dd_coh = np.exp(dang * 1j).conj().astype(np.complex64)
    #### done fh setup ###

    ## xcorr setup
    # data-reading params (to help read in same-size time chunks and average
    # onto the same time-and-frequency grids)
    (Rf, Tf, NUf, fkeep, freqs, rows) = data_averaging_coeffs(fh1, fh2)
    nrows = int(min(rows[0] * Tf[1] / Tf[0], rows[1] * Tf[0] / Tf[1]))

    # summarize the (re)sampling
    if rank == 0:
        tmp = fh1.dtsample.to(
            u.s).value * fh1.blocksize / fh1.recordsize * Rf[0]
        print("\nReading {0} blocks of fh1, {1} blocks of fh2, "
              "for equal timeblocks of {2} sec ".format(Rf[0], Rf[1], tmp))

        if rank == 0 and verbose > 1:
            tmp = np.diff(freqs).mean()
            print("Averaging over {0} channels in fh1, {1} in fh2, for equal "
                  "frequency bins of {2} MHz".format(NUf[0], NUf[1], tmp))
            tmp = fh1.dtsample.to(u.s).value * Tf[0]
            print("Averaging over {0} timesteps in fh1, {1} in fh2, for equal "
                  "samples of {2} s\n".format(Tf[0], Tf[1], tmp))

        # check if we are averaging both fh's
        if rank == 0 and np.all(np.array(Tf) != 1):
            txt = "Note, averaging both fh's in time to have similar sample "\
                  "size. You may want to implement interpolation, or think "\
                  "more about this situation"
            print(txt)
        if rank == 0 and np.all(np.array(NUf) != 1):
            txt = "Note, averaging both fh's in freq to have similar sample "\
                  "size. You may want to implement interpolation, or think "\
                  "more about this situation"
            print(txt)

    # initialize the folded spectrum and waterfall
    nchans = min([len(fh.freq[fkeep[i]] / NUf[i]) for i, fh in enumerate(fhs)])
    foldspec = np.zeros((nchans, ngate, ntbin))
    icount = np.zeros((nchans, ngate, ntbin), dtype=np.int64)
    nwsize = min(nt * fh1.ntint(nchan) // ntw, nt * fh2.ntint(nchan) // ntw)
    waterfall = np.zeros((nchans, nwsize))

    if save_xcorr:
        # output dataset
        outname = "{0}{1}_{2}_{3}.hdf5".format(fh1.telescope[0],
                                               fh2.telescope[0], t0, t1)
        # mpi doesn't like colons
        outname = outname.replace(':', '')
        fcorr = h5py.File(outname, 'w')  # , driver='mpio', comm=comm)
        ## create the x-corr output file
        # save the frequency grids to help with future TODO: interpolate onto
        # same frequency grid. For now the frequencies fall within same bin
        if rank == 0 and verbose:
            print("Saving x-corr to %s\n" % outname)
        fcorr.create_dataset('freqs',
                             data=np.hstack([f.to(u.MHz).value
                                             for f in freqs]))

        # the x-corr data [tsteps, channels]
        dset = fcorr.create_dataset('corr', (nrows, freqs[0].size),
                                    dtype='complex64',
                                    maxshape=(None, nchan))
        dset.attrs.create('dedisperse', data=str(dedisperse))
        dset.attrs.create(
            'tsample',
            data=[fhs[i].dtsample.to(u.s).value * Tf[i] for i in [0, 1]])
        dset.attrs.create('chanbw', data=np.diff(freqs).mean())

    # start reading the data
    # this_nskip moves to 't0', rank is for MPI
    idx = rank
    raws = [
        fh.seek_record_read(
            (fh.this_nskip + idx * Rf[i]) * fh.blocksize - fh.prop_delay,
            fh.blocksize * Rf[i]) for i, fh in enumerate(fhs)
    ]
    endread = False
    print("read step (idx), fh1.time(), fh2.time() ")
    print("\t inclues {0} propagation delay".format(dts[1]))
    while np.all([raw.size > 0 for raw in raws]):
        if verbose:
            print("idx", idx, fh1.time(), fh2.time())

        vals = raws
        chans = [None, None]
        tsamples = [None, None]
        isrs = [None, None]

        # prep the data (channelize, dedisperse, ...)

        for i, fh in enumerate(fhs):
            if rfi_filter_raw is not None:
                raws[i], ok = rfi_filter_raw(raws[i], nchan)

            if fh.telescope == 'aro':
                vals[i] = raws[i].astype(np.float32)
            else:
                vals[i] = raws[i]

            if dedisperse in ['coherent', 'by-channel']:
                fine = fh.thisfft(vals[i],
                                  axis=0,
                                  overwrite_x=True,
                                  **_fftargs)
                if fh.thisfft is rfft:
                    fine_cmplx = fine[1:-1].view(np.complex64)
                    # overwrites parts of fine, as intended
                    fine_cmplx *= fh.dd_coh
                else:
                    fine *= dd_coh
                vals[i] = fh.thisifft(fine,
                                      axis=0,
                                      overwrite_x=True,
                                      **_fftargs)

            if fh.nchan == 1:
                # ARO data should fall here
                chans[i] = fh.thisfft(vals[i].reshape(-1, nchan * 2),
                                      axis=-1,
                                      overwrite_x=True,
                                      **_fftargs)
            else:  # lofar and gmrt-phased are already channelised
                chans[i] = vals[i]

            # dedisperse on original (raw) time/freq grid
            # TODO: profile for speedup
            if dedisperse == 'incoherent':
                for ci, v in enumerate(fh.ndt):
                    chans[i][..., ci] = np.roll(chans[i][..., ci], v, axis=0)

            # average onto same time grid
            chans[i] = chans[i].reshape(Tf[i], chans[i].shape[0] / Tf[i], -1)\
                      .mean(axis=0)

            # average onto same freq grid
            chans[i] = chans[i][..., fkeep[i]]
            chans[i] = chans[i].reshape(-1, chans[i].shape[1] / NUf[i],
                                        NUf[i]).mean(axis=-1)

            # current sample positions in stream
            # (each averaged onto same time grid)
            isrs[i] = idx * rows[i] + np.arange(rows[i])
            tsamples[i] = (fh.this_nskip * fh.dtsample * fh.ntint(nchan) +
                           isrs[i] * fh.dtsample)
            tsamples[i] = tsamples[i].reshape(-1, Tf[i]).mean(axis=-1)

            # finally sort the channels low --> high (if necessary)
            # before x-correlating
            if fh.freqsort:
                # TODO: need to think about ordering
                chans[i] = chans[i][..., ::-1]

        # x-correlate
        xpower = chans[0] * chans[1].conjugate()

        if do_waterfall:
            # loop over corresponding positions in waterfall
            isr = idx * nrows + np.arange(nrows)
            for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1):
                if iw < nwsize:  # add sum of corresponding samples
                    waterfall[0:xpower.shape[1], iw] += \
                        np.abs(np.sum(xpower[isr // ntw == iw], axis=0))

        if do_foldspec:
            # time since start (average of the two streams)
            # TODO: think about this: take one stream, both, average, ...
            #tsample = np.mean(tsamples, axis=0)
            tsample = np.array(tsamples)

            # timeseries already dedispersed
            phase = phasepol[0](tsample[0])
            iphase = np.remainder(phase * ngate, ngate).astype(np.int)

            # bin in the time series: 0..ntbin-1
            ibin = idx * ntbin // nt
            for k in xrange(nchans):  # equally xpower.shape[1]
                foldspec[k, :, ibin] += np.bincount(iphase, np.abs(xpower[:,
                                                                          k]),
                                                    ngate)
                icount[k, :, ibin] += np.bincount(iphase,
                                                  np.abs(xpower[:, k]) != 0.,
                                                  ngate)

        if save_xcorr:
            curshape = dset.shape
            nx = max(nrows * (idx + 1), curshape[0])
            dset.resize((nx + nrows, curshape[1]))
            # TODO: h5py mpio stalls here... turn off save_xcorr for mpirun
            dset[nrows * idx:nrows * (idx + 1)] = xpower

        # read in next dataset if we haven't hit t1 yet
        for fh in [fh1, fh2]:
            if (fh.time() - t1).sec > 0.:
                endread = True

        if endread:
            break
        else:
            idx += size
            raws = [
                fh.seek_record_read(
                    (fh.this_nskip + idx * Rf[i]) * fh.blocksize -
                    fh.prop_delay, fh.blocksize * Rf[i])
                for i, fh in enumerate(fhs)
            ]
    if save_xcorr:
        fcorr.close()
    return foldspec, icount, waterfall
Exemple #7
0
def fold(fh, comm, samplerate, fedge, fedge_at_top, nchan,
         nt, ntint, ngate, ntbin, ntw, dm, fref, phasepol,
         dedisperse='incoherent',
         do_waterfall=True, do_foldspec=True, verbose=True,
         progress_interval=100, rfi_filter_raw=None, rfi_filter_power=None,
         return_fits=True):
    """
    FFT data, fold by phase/time and make a waterfall series

    Folding is done from the position the file is currently in

    Parameters
    ----------
    fh : file handle
        handle to file holding voltage timeseries
    comm: MPI communicator or None
    samplerate : float
        rate at which samples were originally taken and thus double the
        band width (frequency units)
    fedge : float
        edge of the frequency band (frequency units)
    fedge_at_top: bool
        whether edge is at top (True) or bottom (False)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*ntint, with each sample containing
        a single polarisation
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of part of the file that is read (i.e., ignoring nhead)
    dedisperse : None or string (default: incoherent).
        None, 'incoherent', 'coherent', 'by-channel'.
        Note: None really does nothing
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool or int
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets
    return_fits : bool (default: True)
        return a subint fits table for rank == 0 (None otherwise)

    """
    if comm is None:
        rank = 0
        size = 1
    else:
        rank = comm.rank
        size = comm.size

    # initialize folded spectrum and waterfall
    foldspec = np.zeros((nchan, ngate, ntbin))
    icount = np.zeros((nchan, ngate, ntbin), dtype=np.int64)
    nwsize = nt*ntint//ntw
    waterfall = np.zeros((nchan, nwsize))

    if verbose and rank == 0:
        print('Reading from {}'.format(fh))

    nskip = fh.tell()/fh.blocksize
    if nskip > 0:
        if verbose and rank == 0:
            print('Starting {0} blocks = {1} bytes out from start.'
                  .format(nskip, nskip*fh.blocksize))

    dt1 = (1./fh.samplerate).to(u.s)
    # need 2*nchan real-valued samples for each FFT
    if fh.telescope == 'lofar':
        dtsample = fh.dtsample
    else:
        dtsample = nchan * 2 * dt1
    tstart = dtsample * ntint * nskip

    # set up FFT functions: real vs complex fft's
    if fh.nchan > 1:
        thisfft = fft
        thisifft = ifft
        thisfftfreq = fftfreq
    else:
        thisfft = rfft
        thisifft = irfft
        thisfftfreq = rfftfreq

    # pre-calculate time delay due to dispersion in coarse channels
    # LOFAR data is already channelized
    if fh.nchan > 1:
        freq = fh.frequencies
    else:
        if fedge_at_top:
            freq = fedge - thisfftfreq(nchan*2, dt1.value) * u.Hz
        else:
            freq = fedge + thisfftfreq(nchan*2, dt1.value) * u.Hz
        # sort lowest to highest freq
        # freq.sort()
        # [::2] sets frequency channels to numerical recipes ordering
        # or, rfft has an unusual ordering
        freq = freq[::2]

    dt = (dispersion_delay_constant * dm *
          (1./freq**2 - 1./fref**2)).to(u.s).value

    if dedisperse in ['coherent', 'by-channel']:
        # pre-calculate required turns due to dispersion
        if fh.nchan > 1:
            fcoh = (freq[np.newaxis,:] +
                    fftfreq(ntint, dtsample.value)[:,np.newaxis] * u.Hz)
        else:
            if fedge_at_top:
                fcoh = fedge - thisfftfreq(nchan*2*ntint, dt1.value) * u.Hz
            else:
                fcoh = fedge + thisfftfreq(nchan*2*ntint, dt1.value) * u.Hz

        # set frequency relative to which dispersion is coherently corrected
        if dedisperse == 'coherent':
            _fref = fref
        else:
            # _fref = np.round((fcoh * dtsample).to(1).value) / dtsample
            _fref = np.repeat(freq.value, ntint) * freq.unit
        # (check via eq. 5.21 and following in
        # Lorimer & Kramer, Handbook of Pulsar Astrono
        dang = (dispersion_delay_constant * dm * fcoh *
                (1./_fref-1./fcoh)**2) * 360. * u.deg

        if thisfftfreq is rfftfreq:
            # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n]
            # for 0 and n need only real part, but for 1...n-1 need real, imag
            # so just get shifts for r[1], r[2], ..., r[n-1]
            dang = dang.to(u.rad).value[1:-1:2]
        else:
            dang = dang.to(u.rad).value

        dd_coh = np.exp(dang * 1j).conj().astype(np.complex64)

    for j in xrange(rank, nt, size):
        if verbose and j % progress_interval == 0:
            print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                j+1, nt, (tstart+dtsample*j*ntint).value))  # time since start

        # just in case numbers were set wrong -- break if file ends
        # better keep at least the work done
        try:
            # ARO/GMRT return int-stream,
            # LOFAR returns complex64 (count/nchan, nchan)
            # LOFAR "combined" file class can do lots of seeks, we minimize
            # that with the 'seek_record_read' routine
            raw = fh.seek_record_read((nskip+j)*fh.blocksize, fh.blocksize)
        except(EOFError, IOError) as exc:
            print("Hit {0!r}; writing pgm's".format(exc))
            break
        if verbose >= 2:
            print("Read {} items".format(raw.size), end="")

        if rfi_filter_raw is not None:
            raw, ok = rfi_filter_raw(raw, nchan)
            if verbose >= 2:
                print("... raw RFI (zap {0}/{1})"
                      .format(np.count_nonzero(~ok), ok.size), end="")

        if fh.telescope == 'aro':
            vals = raw.astype(np.float32)
        else:
            vals = raw

        # TODO: for coherent dedispersion, need to undo existing channels
        # for lofar and gmrt-phased
        if dedisperse in ['coherent', 'by-channel']:
            fine = thisfft(vals, axis=0, overwrite_x=True, **_fftargs)
            if thisfft is rfft:
                fine_cmplx = fine[1:-1].view(np.complex64)
                fine_cmplx *= dd_coh  # overwrites parts of fine, as intended
            else:
                fine *= dd_coh

            vals = thisifft(fine, axis=0, overwrite_x=True, **_fftargs)
            if verbose >= 2:
                print("... dedispersed", end="")

        if fh.nchan == 1:
            chan2 = thisfft(vals.reshape(-1, nchan*2), axis=-1,
                            overwrite_x=True, **_fftargs)**2
            # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2]
            # re-order to Num.Rec. format: Re[0], Re[n/2], Re[1], ....
            power = np.hstack((chan2[:,:1]+chan2[:,-1:],
                               chan2[:,1:-1].reshape(-1,nchan-1,2).sum(-1)))
        else:  # lofar and gmrt-phased are already channelised
            power = vals.real**2 + vals.imag**2

        if verbose >= 2:
            print("... power", end="")

        if rfi_filter_power is not None:
            power = rfi_filter_power(power)
            print("... power RFI", end="")

        # current sample positions in stream
        isr = j*ntint + np.arange(ntint)

        if do_waterfall:
            # loop over corresponding positions in waterfall
            for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1):
                if iw < nwsize:  # add sum of corresponding samples
                    waterfall[:,iw] += np.sum(power[isr//ntw == iw],
                                              axis=0)
            if verbose >= 2:
                print("... waterfall", end="")

        if do_foldspec:
            tsample = (tstart + isr*dtsample).value  # times since start
            ibin = j*ntbin//nt  # bin in the time series: 0..ntbin-1

            for k in xrange(nchan):
                if dedisperse == 'coherent':
                    t = tsample  # already dedispersed
                elif dedisperse in ['incoherent', 'by-channel']:
                    t = tsample - dt[k]  # dedispersed times
                elif dedisperse is None:
                    t = tsample  # do nothing
                else:
                    t = tsample - dt[k]

                phase = phasepol(t)  # corresponding PSR phases
                iphase = np.remainder(phase*ngate,
                                      ngate).astype(np.int)
                # sum and count samples by phase bin
                foldspec[k, :, ibin] += np.bincount(iphase, power[:, k], ngate)
                icount[k, :, ibin] += np.bincount(iphase, power[:, k] != 0.,
                                                  ngate)

            if verbose >= 2:
                print("... folded", end="")

        if verbose >= 2:
            print("... done")

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j+1, nt))

    if return_fits and rank == 0:
        # subintu HDU
        # update table columns
        # TODO: allow multiple polarizations
        npol = 1
        newcols = []
        # FITS table creation difficulties...
        # assign data *after* 'new_table' creation
        array2assign = {}
        tsubint = ntint*dtsample
        for col in fh['subint'].columns:
            attrs = col.copy().__dict__
            # remove non-init args
            for nn in ['_pseudo_unsigned_ints', '_dims', '_physical_values',
                       'dtype', '_phantom', 'array']:
                attrs.pop(nn, None)

            if col.name == 'TSUBINT':
                array2assign[col.name] = np.array(tsubint)
            elif col.name == 'OFFS_SUB':
                array2assign[col.name] = np.arange(ntbin) * tsubint
            elif col.name == 'DAT_FREQ':
                # TODO: sort from lowest freq. to highest
                # ('DATA') needs sorting as well
                array2assign[col.name] = freq.to(u.MHz).value.astype(np.double)
                attrs['format'] = '{0}D'.format(freq.size)
            elif col.name == 'DAT_WTS':
                array2assign[col.name] = np.ones(freq.size, dtype=np.float32)
                attrs['format'] = '{0}E'.format(freq.size)
            elif col.name == 'DAT_OFFS':
                array2assign[col.name] = np.zeros(freq.size*npol,
                                                  dtype=np.float32)
                attrs['format'] = '{0}E'.format(freq.size*npol)
            elif col.name == 'DAT_SCL':
                array2assign[col.name] = np.ones(freq.size*npol,
                                                 dtype=np.float32)
                attrs['format'] = '{0}E'.format(freq.size)
            elif col.name == 'DATA':
                array2assign[col.name] = np.zeros((ntbin, npol, freq.size,
                                                   ngate), dtype='i1')
                attrs['dim'] = "({},{},{})".format(ngate, freq.size, npol)
                attrs['format'] = "{0}I".format(ngate*freq.size*npol)
            newcols.append(FITS.Column(**attrs))
        newcoldefs = FITS.ColDefs(newcols)

        oheader = fh['SUBINT'].header.copy()
        newtable = FITS.new_table(newcoldefs, nrows=ntbin, header=oheader)
        # update the 'subint' header and create a new one to be returned
        # owing to the structure of the code (MPI), we need to assign
        # the 'DATA' outside of fold.py
        newtable.header.update('NPOL', 1)
        newtable.header.update('NBIN', ngate)
        newtable.header.update('NBIN_PRD', ngate)
        newtable.header.update('NCHAN', freq.size)
        newtable.header.update('INT_UNIT', 'PHS')
        newtable.header.update('TBIN', tsubint.to(u.s).value)
        chan_bw = np.abs(np.diff(freq.to(u.MHz).value).mean())
        newtable.header.update('CHAN_BW', chan_bw)
        if dedisperse in ['coherent', 'by-channel', 'incoherent']:
            newtable.header.update('DM', dm.value)
        # finally assign the table data
        for name, array in array2assign.iteritems():
            try:
                newtable.data.field(name)[:] = array
            except ValueError:
                print("FITS error... work in progress",
                      name, array.shape, newtable.data.field(name)[:].shape)

        phdu = fh['PRIMARY'].copy()
        subinttable = FITS.HDUList([phdu, newtable])
        subinttable[1].header.update('EXTNAME', 'SUBINT')
        subinttable['PRIMARY'].header.update('DATE-OBS', fh.time0.isot)
        subinttable['PRIMARY'].header.update('STT_IMJD', int(fh.time0.mjd))
        subinttable['PRIMARY'].header.update(
            'STT_SMJD', int(str(fh.time0.mjd - int(fh.time0.mjd))[2:])*86400)
    else:
        subinttable = FITS.HDUList([])

    return foldspec, icount, waterfall, subinttable
Exemple #8
0
def fold(fh, comm, samplerate, fedge, fedge_at_top, nchan,
         nt, ntint, ngate, ntbin, ntw, dm, fref, phasepol,
         dedisperse='incoherent',
         do_waterfall=True, do_foldspec=True, verbose=True,
         progress_interval=100, rfi_filter_raw=None, rfi_filter_power=None,
         return_fits=False):
    """
    FFT data, fold by phase/time and make a waterfall series

    Folding is done from the position the file is currently in

    Parameters
    ----------
    fh : file handle
        handle to file holding voltage timeseries
    comm: MPI communicator or None
        will use size, rank attributes
    samplerate : Quantity
        rate at which samples were originally taken and thus double the
        band width (frequency units)
    fedge : float
        edge of the frequency band (frequency units)
    fedge_at_top: bool
        whether edge is at top (True) or bottom (False)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*ntint, with each sample containing
        a single polarisation
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of the file that is read.
    dedisperse : None or string (default: incoherent).
        None, 'incoherent', 'coherent', 'by-channel'.
        Note: None really does nothing
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool or int
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets
    return_fits : bool (default: False)
        return a subint fits table for rank == 0 (None otherwise)

    """
    assert dedisperse in (None, 'incoherent', 'by-channel', 'coherent')
    need_fine_channels = dedisperse in ['by-channel', 'coherent']
    assert nchan % fh.nchan == 0
    if dedisperse == 'by-channel' and fh.nchan > 1:
        oversample = nchan // fh.nchan
        assert ntint % oversample == 0
    else:
        oversample = 1

    if dedisperse == 'coherent' and fh.nchan > 1:
        warnings.warn("Doing coherent dedispersion on channelized data. "
                      "May get artefacts!")

    if comm is None:
        mpi_rank = 0
        mpi_size = 1
    else:
        mpi_rank = comm.rank
        mpi_size = comm.size

    npol = getattr(fh, 'npol', 1)
    assert npol == 1 or npol == 2
    if verbose > 1 and mpi_rank == 0:
        print("Number of polarisations={}".format(npol))

    # initialize folded spectrum and waterfall
    # TODO: use estimated number of points to set dtype
    if do_foldspec:
        foldspec = np.zeros((ntbin, nchan, ngate, npol**2), dtype=np.float32)
        icount = np.zeros((ntbin, nchan, ngate), dtype=np.int32)
    else:
        foldspec = None
        icount = None

    if do_waterfall:
        nwsize = nt*ntint//ntw//oversample
        waterfall = np.zeros((nwsize, nchan, npol**2), dtype=np.float64)
    else:
        waterfall = None

    if verbose and mpi_rank == 0:
        print('Reading from {}'.format(fh))

    nskip = fh.tell()/fh.blocksize
    if nskip > 0:
        if verbose and mpi_rank == 0:
            print('Starting {0} blocks = {1} bytes out from start.'
                  .format(nskip, nskip*fh.blocksize))

    dt1 = (1./samplerate).to(u.s)
    # need 2*nchan real-valued samples for each FFT
    if fh.telescope == 'lofar':
        dtsample = fh.dtsample
    else:
        dtsample = nchan // oversample * 2 * dt1
    tstart = dtsample * ntint * nskip

    # pre-calculate time delay due to dispersion in coarse channels
    # for channelized data, frequencies are known

    tb = -1. if fedge_at_top else +1.
    if fh.nchan == 1:
        if getattr(fh, 'data_is_complex', False):
            # for complex data, really each complex sample consists of
            # 2 real ones, so multiply dt1 by 2.
            freq = fedge + tb * fftfreq(nchan, 2.*dt1.value) * u.Hz
            if dedisperse == 'coherent':
                fcoh = fedge + tb * fftfreq(nchan*ntint, 2.*dt1.value) * u.Hz
                fcoh.shape = (-1, 1)
            elif dedisperse == 'by-channel':
                fcoh = freq + (tb * fftfreq(
                    ntint, 2.*dtsample.value) * u.Hz)[:, np.newaxis]
        else:
            freq = fedge + tb * rfftfreq(nchan*2, dt1.value)[::2] * u.Hz
            if dedisperse == 'coherent':
                fcoh = fedge + tb * rfftfreq(nchan*ntint*2,
                                             dt1.value)[::2] * u.Hz
                fcoh.shape = (-1, 1)
            elif dedisperse == 'by-channel':
                fcoh = freq + tb * fftfreq(
                    ntint, dtsample.value)[:, np.newaxis] * u.Hz
        freq_in = freq
    else:
        # input frequencies may not be the ones going out
        freq_in = fh.frequencies
        if oversample == 1:
            freq = freq_in
        else:
            freq = (freq_in[:, np.newaxis] + tb * u.Hz *
                    rfftfreq(oversample*2, dtsample.value/2.)[::2])
        # same as fine = rfftfreq(2*ntint, dtsample.value/2.)[::2]
        fcoh = freq_in[np.newaxis, :] + tb * u.Hz * rfftfreq(
            ntint*2, dtsample.value/2.)[::2, np.newaxis]
        # print('fedge_at_top={0}, tb={1}'.format(fedge_at_top, tb))
    ifreq = freq.ravel().argsort()

    # pre-calculate time offsets in (input) channelized streams
    dt = dispersion_delay_constant * dm * (1./freq_in**2 - 1./fref**2)

    if need_fine_channels:
        # pre-calculate required turns due to dispersion.
        #
        # set frequency relative to which dispersion is coherently corrected
        if dedisperse == 'coherent':
            _fref = fref
        else:
            _fref = freq_in[np.newaxis, :]
        # (check via eq. 5.21 and following in
        # Lorimer & Kramer, Handbook of Pulsar Astronomy
        dang = (dispersion_delay_constant * dm * fcoh *
                (1./_fref-1./fcoh)**2) * u.cycle
        with u.set_enabled_equivalencies(u.dimensionless_angles()):
            dd_coh = np.exp(dang * 1j).conj().astype(np.complex64)

        # add dimension for polarisation
        dd_coh = dd_coh[..., np.newaxis]

    # Calculate the part of the whole file this node should handle.
    size_per_node = (nt-1)//mpi_size + 1
    start_block = mpi_rank*size_per_node
    end_block = min((mpi_rank+1)*size_per_node, nt)
    for j in range(start_block, end_block):
        if verbose and j % progress_interval == 0:
            print('#{:4d}/{:4d} is doing {:6d}/{:6d} [={:6d}/{:6d}]; '
                  'time={:18.12f}'
                  .format(mpi_rank, mpi_size, j+1, nt,
                          j-start_block+1, end_block-start_block,
                          (tstart+dtsample*j*ntint).value))  # time since start

        # Just in case numbers were set wrong -- break if file ends;
        # better keep at least the work done.
        try:
            raw = fh.seek_record_read(int((nskip+j)*fh.blocksize),
                                      fh.blocksize)
        except(EOFError, IOError) as exc:
            print("Hit {0!r}; writing data collected.".format(exc))
            break
        if verbose >= 2:
            print("#{:4d}/{:4d} read {} items"
                  .format(mpi_rank, mpi_size, raw.size), end="")

        if npol == 2:  # multiple polarisations
            raw = raw.view(raw.dtype.fields.values()[0][0])

        if fh.nchan == 1:  # raw.shape=(ntint*npol)
            raw = raw.reshape(-1, npol)
        else:              # raw.shape=(ntint, nchan*npol)
            raw = raw.reshape(-1, fh.nchan, npol)

        if rfi_filter_raw is not None:
            raw, ok = rfi_filter_raw(raw)
            if verbose >= 2:
                print("... raw RFI (zap {0}/{1})"
                      .format(np.count_nonzero(~ok), ok.size), end="")

        if np.can_cast(raw.dtype, np.float32):
            vals = raw.astype(np.float32)
        else:
            assert raw.dtype.kind == 'c'
            vals = raw

        if fh.nchan == 1:
            # have real-valued time stream of complex baseband
            # if we need some coherentdedispersion, do FT of whole thing,
            # otherwise to output channels
            if raw.dtype.kind == 'c':
                ftchan = len(vals) if dedisperse == 'coherent' else nchan
                vals = fft(vals.reshape(-1, ftchan, npol), axis=1,
                           overwrite_x=True, **_fftargs)
            else:  # real data
                ftchan = len(vals) // 2 if dedisperse == 'coherent' else nchan
                vals = rfft(vals.reshape(-1, ftchan*2, npol), axis=1,
                            overwrite_x=True, **_fftargs)
                if vals.dtype.kind == 'f':  # this depends on version, sigh.
                    # rfft: Re[0], Re[1], Im[1],.,Re[n/2-1], Im[n/2-1], Re[n/2]
                    # re-order to normal fft format (like Numerical Recipes):
                    # Re[0], Re[n], Re[1], Im[1], .... (channel 0 junk anyway)
                    vals = (np.hstack((vals[:, :1], vals[:, -1:],
                                       vals[:, 1:-1]))
                            .reshape(-1, ftchan, 2 * npol))
                    if npol == 2:  # reorder pol & real/imag
                        vals1 = vals[:, :, 1]
                        vals[:, :, 1] = vals[:, :, 2]
                        vals[:, :, 2] = vals1
                        vals = vals.reshape(-1, ftchan, npol, 2)
                else:
                    vals[:, 0] = vals[:, 0].real + 1j * vals[:, -1].real
                    vals = vals[:, :-1]

                vals = vals.view(np.complex64).reshape(-1, ftchan, npol)

            # for incoherent,            vals.shape=(ntint, nchan, npol)
            # for others, (1, ntint*nchan, npol) -> (ntint*nchan, 1, npol)
            if need_fine_channels:
                if dedisperse == 'by-channel':
                    fine = fft(vals, axis=0, overwrite_x=True, **_fftargs)
                else:
                    fine = vals.reshape(-1, 1, npol)

        else:  # data already channelized
            if need_fine_channels:
                fine = fft(vals, axis=0, overwrite_x=True, **_fftargs)
                # have fine.shape=(ntint, fh.nchan, npol)

        if need_fine_channels:
            # Dedisperse.
            fine *= dd_coh

            # if dedisperse == 'by-channel' and oversample > 1:
                # fine.shape=(ntint*oversample, chan_in, npol)
                #           =(coarse,fine,fh.chan, npol)
                #  -> reshape(oversample, ntint, fh.nchan, npol)
                # want (ntint=fine, fh.nchan, oversample, npol) -> .transpose
                # fine = (fine.reshape(nchan / fh.nchan, -1, fh.nchan, npol)
                #         .transpose(1, 2, 0, 3)
                #         .reshape(-1, nchan, npol))

            # now fine.shape=(ntint, nchan, npol)  w/ nchan=1 for coherent
            vals = ifft(fine, axis=0, overwrite_x=True, **_fftargs)

            if dedisperse == 'coherent' and nchan > 1 and fh.nchan == 1:
                # final FT to get requested channels
                vals = vals.reshape(-1, nchan, npol)
                vals = fft(vals, axis=1, overwrite_x=True, **_fftargs)
            elif dedisperse == 'by-channel' and oversample > 1:
                vals = vals.reshape(-1, oversample, fh.nchan, npol)
                vals = fft(vals, axis=1, overwrite_x=True, **_fftargs)
                vals = vals.transpose(0, 2, 1, 3).reshape(-1, nchan, npol)

            # vals[time, chan, pol]
            if verbose >= 2:
                print("... dedispersed", end="")

        if npol == 1:
            power = vals.real**2 + vals.imag**2
        else:
            p0 = vals[..., 0]
            p1 = vals[..., 1]
            power = np.empty(vals.shape[:-1] + (4,), np.float32)
            power[..., 0] = p0.real**2 + p0.imag**2
            power[..., 1] = p0.real*p1.real + p0.imag*p1.imag
            power[..., 2] = p0.imag*p1.real - p0.real*p1.imag
            power[..., 3] = p1.real**2 + p1.imag**2

        if verbose >= 2:
            print("... power", end="")

        # current sample positions and corresponding time in stream
        isr = j*(ntint // oversample) + np.arange(ntint // oversample)
        tsr = (isr*dtsample*oversample)[:, np.newaxis]

        if rfi_filter_power is not None:
            power = rfi_filter_power(power, tsr.squeeze())
            print("... power RFI", end="")

        # correct for delay if needed
        if dedisperse in ['incoherent', 'by-channel']:
            # tsample.shape=(ntint/oversample, nchan_in)
            tsr = tsr - dt

        if do_waterfall:
            # # loop over corresponding positions in waterfall
            # for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1):
            #     if iw < nwsize:  # add sum of corresponding samples
            #         waterfall[iw, :] += np.sum(power[isr//ntw == iw],
            #                                    axis=0)[ifreq]
            iw = np.round((tsr / dtsample / oversample).to(1)
                          .value / ntw).astype(int)
            for k, kfreq in enumerate(ifreq):  # sort in frequency while at it
                iwk = iw[:, (0 if iw.shape[1] == 1 else kfreq // oversample)]
                iwk = np.clip(iwk, 0, nwsize-1, out=iwk)
                iwkmin = iwk.min()
                iwkmax = iwk.max()+1
                for ipow in range(npol**2):
                    waterfall[iwkmin:iwkmax, k, ipow] += np.bincount(
                        iwk-iwkmin, power[:, kfreq, ipow], iwkmax-iwkmin)
            if verbose >= 2:
                print("... waterfall", end="")

        if do_foldspec:
            ibin = (j*ntbin) // nt  # bin in the time series: 0..ntbin-1

            # times and cycles since start time of observation.
            tsample = tstart + tsr
            phase = (phasepol(tsample.to(u.s).value.ravel())
                     .reshape(tsample.shape))
            # corresponding PSR phases
            iphase = np.remainder(phase*ngate, ngate).astype(np.int)

            for k, kfreq in enumerate(ifreq):  # sort in frequency while at it
                iph = iphase[:, (0 if iphase.shape[1] == 1
                                 else kfreq // oversample)]
                # sum and count samples by phase bin
                for ipow in range(npol**2):
                    foldspec[ibin, k, :, ipow] += np.bincount(
                        iph, power[:, kfreq, ipow], ngate)
                icount[ibin, k, :] += np.bincount(
                    iph, power[:, kfreq, 0] != 0., ngate)

            if verbose >= 2:
                print("... folded", end="")

        if verbose >= 2:
            print("... done")

    #Commented out as workaround, this was causing "Referenced before assignment" errors with JB data
    #if verbose >= 2 or verbose and mpi_rank == 0:
    #    print('#{:4d}/{:4d} read {:6d} out of {:6d}'
    #          .format(mpi_rank, mpi_size, j+1, nt))

    if npol == 1:
        if do_foldspec:
            foldspec = foldspec.reshape(foldspec.shape[:-1])
        if do_waterfall:
            waterfall = waterfall.reshape(waterfall.shape[:-1])

    return foldspec, icount, waterfall
Exemple #9
0
def fold(file1, samplerate, fmid, nchan,
         nt, ntint, nhead, ngate, ntbin, ntw, dm, fref, phasepol,
         coherent=False, do_waterfall=True, do_foldspec=True, verbose=True,
         progress_interval=100):
    """FFT Effelsberg data, fold by phase/time and make a waterfall series

    Parameters
    ----------
    file1 : string
        name of the file holding voltage timeseries
    samplerate : float
        rate at which samples were originally taken and thus band width
        (frequency units))
    fmid : float
        mid point of the frequency band (frequency units)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*(2*ntint), with each sample containing
        real,imag for two polarisations
    nhead : int
        number of bytes to skip before reading (usually 4096 for Effelsberg)
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of part of the file that is read (i.e., ignoring nhead)
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets
    """

    # initialize folded spectrum and waterfall
    foldspec2 = np.zeros((nchan, ngate, ntbin))
    nwsize = nt*ntint//ntw
    waterfall = np.zeros((nchan, nwsize))

    # size in bytes of records read from file (each nchan contains 4 bytes:
    # real,imag for 2 polarisations).
    recsize = 4*nchan*ntint
    if verbose:
        print('Reading from {}'.format(file1))

    myopen = gzip.open if '.gz' in file1 else open
    with myopen(file1, 'rb', recsize) as fh1:

        if nhead > 0:
            if verbose:
                print('Skipping {0} bytes'.format(nhead))
            fh1.seek(nhead)

        foldspec = np.zeros((nchan, ngate))
        icount = np.zeros((nchan, ngate))

        # gosh, fftpack has everything; used to calculate with:
        # fband / nchan * (np.mod(np.arange(nchan)+nchan/2, nchan)-nchan/2)
        if coherent:
            # pre-calculate required turns due to dispersion
            fcoh = (fmid +
                    fftfreq(nchan*ntint, (1./samplerate).to(u.s).value) * u.Hz)
            # (check via eq. 5.21 and following in
            # Lorimer & Kramer, Handbook of Pulsar Astrono
            dang = (dispersion_delay_constant * dm * fcoh *
                    (1./fref-1./fcoh)**2) * 360. * u.deg
            dedisperse = np.exp(dang.to(u.rad).value * 1j
                                ).conj().astype(np.complex64)
        else:
            # pre-calculate time delay due to dispersion
            freq = fmid + fftfreq(nchan, (1./samplerate).to(u.s).value) * u.Hz
            dt = (dispersion_delay_constant * dm *
                  (1./freq**2 - 1./fref**2)).to(u.s).value

        dtsample = (nchan/samplerate).to(u.s).value

        for j in xrange(nt):
            if verbose and (j+1) % progress_interval == 0:
                print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                    j+1, nt, dtsample*j*ntint))   # equivalent time since start

            # just in case numbers were set wrong -- break if file ends
            # better keep at least the work done
            try:
                # data stored as series of two two-byte complex numbers,
                # one for each polarization
                raw = np.fromstring(fh1.read(recsize),
                                    dtype=np.int8).reshape(-1,2,2)
            except:
                break

            # use view for fast conversion from float to complex
            vals = raw.astype(np.float32).view(np.complex64).squeeze()
            # vals[i_int * i_block, i_pol]
            if coherent:
                fine = fft(vals, axis=0, overwrite_x=True, **_fftargs)
                fine *= dedisperse[:,np.newaxis]
                vals = ifft(fine, axis=0, overwrite_x=True, **_fftargs)

            chan = fft(vals.reshape(-1, nchan, 2), axis=1, overwrite_x=True,
                       **_fftargs)
            # chan[i_int, i_block, i_pol]
            power = np.sum(chan.real**2+chan.imag**2, axis=-1)

            # current sample positions in stream
            isr = j*ntint + np.arange(ntint)

            if do_waterfall:
                # loop over corresponding positions in waterfall
                for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1):
                    if iw < nwsize:  # add sum of corresponding samples
                        waterfall[:,iw] += np.sum(power[isr//ntw == iw],
                                                  axis=0)

            if do_foldspec:
                tsample = dtsample*isr  # times since start

                for k in xrange(nchan):
                    if coherent:
                        t = tsample  # already dedispersed
                    else:
                        t = tsample - dt[k]  # dedispersed times
                    phase = phasepol(t)  # corresponding PSR phases
                    iphase = np.remainder(phase*ngate,
                                          ngate).astype(np.int)
                    # sum and count samples by phase bin
                    foldspec[k] += np.bincount(iphase, power[:,k], ngate)
                    icount[k] += np.bincount(iphase, None, ngate)

                ibin = j*ntbin//nt  # bin in the time series: 0..ntbin-1
                if (j+1)*ntbin//nt > ibin:  # last addition to bin?
                    # get normalised flux in each bin (where any were added)
                    nonzero = icount > 0
                    nfoldspec = np.where(nonzero, foldspec/icount, 0.)
                    # subtract phase average and store
                    nfoldspec -= np.where(nonzero,
                                          np.sum(nfoldspec, 1, keepdims=True) /
                                          np.sum(nonzero, 1, keepdims=True), 0)
                    foldspec2[:,:,ibin] = nfoldspec
                    # reset for next iteration
                    foldspec *= 0
                    icount *= 0

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j+1, nt))

    if do_foldspec:
        # swap two halfs in frequency, so that freq increases monotonically
        foldspec2 = fftshift(foldspec2, axes=0)

    if do_waterfall:
        nonzero = waterfall == 0.
        waterfall -= np.where(nonzero,
                              np.sum(waterfall, 1, keepdims=True) /
                              np.sum(nonzero, 1, keepdims=True), 0.)
        # swap two halfs in frequency, so that freq increases monotonically
        waterfall = fftshift(waterfall, axes=0)

    return foldspec2, waterfall
def fold(fh1, dtype, samplerate, fedge, fedge_at_top, nchan,
         nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol,
         dedisperse='incoherent',
         do_waterfall=True, do_foldspec=True, verbose=True,
         progress_interval=100, comm=None):
    """FFT GMRT data, fold by phase/time and make a waterfall series

    Parameters
    ----------
    fh1 : file handle
        handle to file holding voltage timeseries
    dtype : numpy dtype or '4bit' or '1bit'
        way the data are stored in the file
    samplerate : float
        rate at which samples were originally taken and thus double the
        band width (frequency units)
    fedge : float
        edge of the frequency band (frequency units)
    fedge_at_top: bool
        whether edge is at top (True) or bottom (False)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*ntint, with each sample containing
        a single polarisation
    nskip : int
        number of records (nchan*ntint*2 for phased data w/ np.int8 real,imag)
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of part of the file that is read (i.e., ignoring nhead)
    dedisperse : None or string
        None, 'incoherent', 'coherent', 'by-channel'
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets
    comm : MPI communicator (default: None)
    """
    if comm is None:
        rank = 0
        size = 1
    else:
        rank = comm.rank
        size = comm.size

    # initialize folded spectrum and waterfall
    foldspec2 = np.zeros((nchan, ngate, ntbin))
    nwsize = nt*ntint//ntw
    waterfall = np.zeros((nchan, nwsize))

    # size in bytes of records read from file (simple for ARO: 1 byte/sample)
    # double since we need to get ntint samples after FFT
    itemsize = {np.int8: 2}[dtype]
    recsize = nchan*ntint*itemsize

    if verbose:
        print('Reading from {}'.format(fh1))

    if nskip > 0:
        if verbose:
            print('Skipping {0} {1}-byte records'.format(nskip, recsize))
        if size == 1:
            fh1.seek(nskip * recsize)

    foldspec = np.zeros((nchan, ngate, ntbin), dtype=np.int)
    icount = np.zeros((nchan, ngate, ntbin), dtype=np.int)

    dt1 = (1./samplerate).to(u.s)
    # but include 2*nchan real-valued samples used for each FFT
    # (or, equivalently, real and imag for channels)
    dtsample = nchan * 2 * dt1
    tstart = dt1 * nskip * recsize

    # pre-calculate time delay due to dispersion in coarse channels
    freq = fftshift(fftfreq(nchan, 2.*dt1.value)) * u.Hz
    freq = (fedge - (freq-freq[0])
            if fedge_at_top
            else fedge + (freq-freq[0]))
    # [::2] sets frequency channels to numerical recipes ordering
    dt = (dispersion_delay_constant * dm *
          (1./freq**2 - 1./fref**2)).to(u.s).value
    # if dedisperse in {'coherent', 'by-channel'}:
    #     # pre-calculate required turns due to dispersion
    #     fcoh = (fedge - fftfreq(nchan*ntint, 2.*dt1)
    #             if fedge_at_top
    #             else
    #             fedge + fftfreq(nchan*ntint, 2.*dt1))
    #     # set frequency relative to which dispersion is coherently corrected
    #     if dedisperse == 'coherent':
    #         _fref = fref
    #     else:
    #         _fref = np.repeat(freq.value, ntint) * freq.unit
    #     # (check via eq. 5.21 and following in
    #     # Lorimer & Kramer, Handbook of Pulsar Astrono
    #     dang = (dispersion_delay_constant * dm * fcoh *
    #             (1./_fref-1./fcoh)**2) * 360. * u.deg
    #     # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n]
    #     # for 0 and n need only real part, but for 1...n-1 need real, imag
    #     # so just get shifts for r[1], r[2], ..., r[n-1]
    #     dang = dang.to(u.rad).value[1:-1:2]
    #     dd_coh = np.exp(dang * 1j).conj().astype(np.complex64)

    for j in xrange(rank, nt, size):
        if verbose and j % progress_interval == 0:
            print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                j+1, nt, (tstart+dtsample*j*ntint).value))   # time since start

        # just in case numbers were set wrong -- break if file ends
        # better keep at least the work done
        try:
            if size > 1:
                fh1.seek((nskip + j) * recsize)
            # data just a series of byte pairs, of real and imag
            raw = fromfile(fh1, dtype, recsize)
        except(EOFError, IOError) as exc:
            print("Hit {}; writing pgm's".format(exc))
            break
        if verbose == 'very':
            print("Read {} items".format(raw.size), end="")

        vals = raw.astype(np.float32).view(np.complex64).squeeze()
        # if dedisperse in {'coherent', 'by-channel'}:
        #     fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs)
        #     fine_cmplx = fine[1:-1].view(np.complex64)
        #     fine_cmplx *= dd_coh  # this overwrites parts of fine, as intended
        #     vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs)
        #     if verbose == 'very':
        #         print("... dedispersed", end="")

        chan = vals.reshape(-1, nchan)
        if verbose == 'very':
            print("... power", end="")
        power = chan.real**2+chan.imag**2

        # current sample positions in stream
        isr = j*ntint + np.arange(ntint)

        if do_waterfall:
            # loop over corresponding positions in waterfall
            for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1):
                if iw < nwsize:  # add sum of corresponding samples
                    waterfall[:,iw] += np.sum(power[isr//ntw == iw],
                                              axis=0)
            if verbose == 'very':
                print("... waterfall", end="")

        if do_foldspec:
            tsample = (tstart + isr*dtsample).value  # times since start

            ibin = j*ntbin//nt  # bin in the time series: 0..ntbin-1

            for k in xrange(nchan):
                if dedisperse == 'coherent':
                    t = tsample  # already dedispersed
                else:
                    t = tsample - dt[k]  # dedispersed times

                phase = phasepol(t)  # corresponding PSR phases
                iphase = np.remainder(phase*ngate,
                                      ngate).astype(np.int)
                # sum and count samples by phase bin
                foldspec[k,:,ibin] += np.bincount(iphase, power[:,k], ngate)
                icount[k,:,ibin] += np.bincount(iphase, None, ngate)

            if verbose == 'very':
                print("... folded", end="")

            if 0: #done in gmrt.py (j+1)*ntbin//nt > ibin:  # last addition to bin?
                # get normalised flux in each bin (where any were added)
                nonzero = icount > 0
                nfoldspec = np.where(nonzero, foldspec/icount, 0.)
                # subtract phase average and store
                nfoldspec -= np.where(nonzero,
                                      np.sum(nfoldspec, 1, keepdims=True) /
                                      np.sum(nonzero, 1, keepdims=True), 0)
                foldspec2[:,:,ibin] = nfoldspec
                # reset for next iteration
                foldspec *= 0
                icount *= 0
                if verbose == 'very':
                    print("... added", end="")
        if verbose == 'very':
            print("... done")

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j+1, nt))

    if 0: # done in gmrt.py do_waterfall:
        nonzero = waterfall == 0.
        waterfall -= np.where(nonzero,
                              np.sum(waterfall, 1, keepdims=True) /
                              np.sum(nonzero, 1, keepdims=True), 0.)

    return foldspec, icount, waterfall
def fold(fh1,
         dtype,
         samplerate,
         fedge,
         fedge_at_top,
         nchan,
         nt,
         ntint,
         nskip,
         ngate,
         ntbin,
         ntw,
         dm,
         fref,
         phasepol,
         dedisperse='incoherent',
         do_waterfall=True,
         do_foldspec=True,
         verbose=True,
         progress_interval=100,
         comm=None):
    """FFT GMRT data, fold by phase/time and make a waterfall series

    Parameters
    ----------
    fh1 : file handle
        handle to file holding voltage timeseries
    dtype : numpy dtype or '4bit' or '1bit'
        way the data are stored in the file
    samplerate : float
        rate at which samples were originally taken and thus double the
        band width (frequency units)
    fedge : float
        edge of the frequency band (frequency units)
    fedge_at_top: bool
        whether edge is at top (True) or bottom (False)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*ntint, with each sample containing
        a single polarisation
    nskip : int
        number of records (nchan*ntint*2 for phased data w/ np.int8 real,imag)
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of part of the file that is read (i.e., ignoring nhead)
    dedisperse : None or string
        None, 'incoherent', 'coherent', 'by-channel'
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets
    comm : MPI communicator (default: None)
    """
    if comm is None:
        rank = 0
        size = 1
    else:
        rank = comm.rank
        size = comm.size

    # initialize folded spectrum and waterfall
    foldspec2 = np.zeros((nchan, ngate, ntbin))
    nwsize = nt * ntint // ntw
    waterfall = np.zeros((nchan, nwsize))

    # size in bytes of records read from file (simple for ARO: 1 byte/sample)
    # double since we need to get ntint samples after FFT
    itemsize = {np.int8: 2}[dtype]
    recsize = nchan * ntint * itemsize

    if verbose:
        print('Reading from {}'.format(fh1))

    if nskip > 0:
        if verbose:
            print('Skipping {0} {1}-byte records'.format(nskip, recsize))
        if size == 1:
            fh1.seek(nskip * recsize)

    foldspec = np.zeros((nchan, ngate, ntbin), dtype=np.int)
    icount = np.zeros((nchan, ngate, ntbin), dtype=np.int)

    dt1 = (1. / samplerate).to(u.s)
    # but include 2*nchan real-valued samples used for each FFT
    # (or, equivalently, real and imag for channels)
    dtsample = nchan * 2 * dt1
    tstart = dt1 * nskip * recsize

    # pre-calculate time delay due to dispersion in coarse channels
    freq = fftshift(fftfreq(nchan, 2. * dt1.value)) * u.Hz
    freq = (fedge - (freq - freq[0]) if fedge_at_top else fedge +
            (freq - freq[0]))
    # [::2] sets frequency channels to numerical recipes ordering
    dt = (dispersion_delay_constant * dm * (1. / freq**2 - 1. / fref**2)).to(
        u.s).value
    # if dedisperse in {'coherent', 'by-channel'}:
    #     # pre-calculate required turns due to dispersion
    #     fcoh = (fedge - fftfreq(nchan*ntint, 2.*dt1)
    #             if fedge_at_top
    #             else
    #             fedge + fftfreq(nchan*ntint, 2.*dt1))
    #     # set frequency relative to which dispersion is coherently corrected
    #     if dedisperse == 'coherent':
    #         _fref = fref
    #     else:
    #         _fref = np.repeat(freq.value, ntint) * freq.unit
    #     # (check via eq. 5.21 and following in
    #     # Lorimer & Kramer, Handbook of Pulsar Astrono
    #     dang = (dispersion_delay_constant * dm * fcoh *
    #             (1./_fref-1./fcoh)**2) * 360. * u.deg
    #     # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n]
    #     # for 0 and n need only real part, but for 1...n-1 need real, imag
    #     # so just get shifts for r[1], r[2], ..., r[n-1]
    #     dang = dang.to(u.rad).value[1:-1:2]
    #     dd_coh = np.exp(dang * 1j).conj().astype(np.complex64)

    for j in xrange(rank, nt, size):
        if verbose and j % progress_interval == 0:
            print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                j + 1, nt,
                (tstart + dtsample * j * ntint).value))  # time since start

        # just in case numbers were set wrong -- break if file ends
        # better keep at least the work done
        try:
            if size > 1:
                fh1.seek((nskip + j) * recsize)
            # data just a series of byte pairs, of real and imag
            raw = fromfile(fh1, dtype, recsize)
        except (EOFError, IOError) as exc:
            print("Hit {}; writing pgm's".format(exc))
            break
        if verbose == 'very':
            print("Read {} items".format(raw.size), end="")

        vals = raw.astype(np.float32).view(np.complex64).squeeze()
        # if dedisperse in {'coherent', 'by-channel'}:
        #     fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs)
        #     fine_cmplx = fine[1:-1].view(np.complex64)
        #     fine_cmplx *= dd_coh  # this overwrites parts of fine, as intended
        #     vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs)
        #     if verbose == 'very':
        #         print("... dedispersed", end="")

        chan = vals.reshape(-1, nchan)
        if verbose == 'very':
            print("... power", end="")
        power = chan.real**2 + chan.imag**2

        # current sample positions in stream
        isr = j * ntint + np.arange(ntint)

        if do_waterfall:
            # loop over corresponding positions in waterfall
            for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1):
                if iw < nwsize:  # add sum of corresponding samples
                    waterfall[:, iw] += np.sum(power[isr // ntw == iw], axis=0)
            if verbose == 'very':
                print("... waterfall", end="")

        if do_foldspec:
            tsample = (tstart + isr * dtsample).value  # times since start

            ibin = j * ntbin // nt  # bin in the time series: 0..ntbin-1

            for k in xrange(nchan):
                if dedisperse == 'coherent':
                    t = tsample  # already dedispersed
                else:
                    t = tsample - dt[k]  # dedispersed times

                phase = phasepol(t)  # corresponding PSR phases
                iphase = np.remainder(phase * ngate, ngate).astype(np.int)
                # sum and count samples by phase bin
                foldspec[k, :, ibin] += np.bincount(iphase, power[:, k], ngate)
                icount[k, :, ibin] += np.bincount(iphase, None, ngate)

            if verbose == 'very':
                print("... folded", end="")

            if 0:  #done in gmrt.py (j+1)*ntbin//nt > ibin:  # last addition to bin?
                # get normalised flux in each bin (where any were added)
                nonzero = icount > 0
                nfoldspec = np.where(nonzero, foldspec / icount, 0.)
                # subtract phase average and store
                nfoldspec -= np.where(
                    nonzero,
                    np.sum(nfoldspec, 1, keepdims=True) /
                    np.sum(nonzero, 1, keepdims=True), 0)
                foldspec2[:, :, ibin] = nfoldspec
                # reset for next iteration
                foldspec *= 0
                icount *= 0
                if verbose == 'very':
                    print("... added", end="")
        if verbose == 'very':
            print("... done")

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j + 1, nt))

    if 0:  # done in gmrt.py do_waterfall:
        nonzero = waterfall == 0.
        waterfall -= np.where(
            nonzero,
            np.sum(waterfall, 1, keepdims=True) /
            np.sum(nonzero, 1, keepdims=True), 0.)

    return foldspec, icount, waterfall