def fold(file1, file2, dtype, fbottom, fwidth, nchan,
         nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol,
         coherent=False, do_waterfall=True, do_foldspec=True, verbose=True,
    """Fold pre-channelized LOFAR data, possibly dedispersing it

    file1, file2 : string
        names of the files holding real and imaginary subchannel timeseries
    dtype : numpy dtype
        way the data are stored in the file (normally '>f4')
    fbottom : float
        frequency of the lowest channel (frequency units)
    fwidth : float
        channel width (frequency units, normally 200*u.MHz/1024.)
    nchan : int
        number of frequency channels
    nt, ntint : int
        number nt of sets to use, each containing ntint samples;
        hence, total # of samples used is nt*(2*ntint).
    nskip : int
        number of bytes to skip before reading
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        the start of the file that is read (i.e., including nskip)
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets

    # initialize folded spectrum and waterfall
    foldspec2 = np.zeros((nchan, ngate, ntbin))
    nwsize = nt*ntint//ntw
    waterfall = np.zeros((nchan, nwsize))

    # # of items to read from file.
    itemsize = np.dtype(dtype).itemsize
    count = nchan*ntint
    if verbose:
        print('Reading from {}\n         and {}'.format(file1, file2))

    with open(file1, 'rb', count*itemsize) as fh1, \
         open(file2, 'rb', count*itemsize) as fh2:

        if nskip > 0:
            if verbose:
                print('Skipping {0} bytes'.format(nskip))

        foldspec = np.zeros((nchan, ngate))
        icount = np.zeros((nchan, ngate))

        dtsample = (1./fwidth).to(u.s)
        tstart = dtsample * nskip // itemsize // nchan

        # pre-calculate time delay due to dispersion in course channels
        freq = fbottom + fwidth * np.arange(nchan)
        dt = (dispersion_delay_constant * dm *
              (1./freq**2 - 1./fref**2)).to(u.s).value

        for j in xrange(nt):
            if verbose and j % progress_interval == 0:
                print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                    j+1, nt, (tstart+dtsample*j*ntint).value))
                # time since start of file

            # just in case numbers were set wrong -- break if file ends
            # better keep at least the work done
                # data stored as series of floats in two files,
                # one for real and one for imaginary
                raw1 = fromfile(fh1, dtype, count).reshape(-1,nchan)
                raw2 = fromfile(fh2, dtype, count).reshape(-1,nchan)

            power = raw1**2+raw2**2
            # power[#int, #block]

            # current sample positions in stream
            isr = j*ntint + np.arange(ntint)

            if do_waterfall:
                # loop over corresponding positions in waterfall
                for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1):
                    if iw < nwsize:  # add sum of corresponding samples
                        waterfall[:,iw] += np.sum(power[isr//ntw == iw],

            if do_foldspec:
                tsample = (tstart + dtsample * isr).value  # times since start

                for k in xrange(nchan):
                    t = tsample - dt[k]  # dedispersed times
                    phase = phasepol(t)  # corresponding PSR phases
                    iphase = np.remainder(phase*ngate,
                    # sum and count samples by phase bin
                    foldspec[k] += np.bincount(iphase, power[:,k], ngate)
                    icount[k] += np.bincount(iphase, None, ngate)

                ibin = j*ntbin//nt  # bin in the time series: 0..ntbin-1
                if (j+1)*ntbin//nt > ibin:  # last addition to bin?
                    # get normalised flux in each bin (where any were added)
                    nonzero = icount > 0
                    nfoldspec = np.where(nonzero, foldspec/icount, 0.)
                    # subtract phase average and store
                    nfoldspec -= np.where(nonzero,
                                          np.sum(nfoldspec, 1, keepdims=True) /
                                          np.sum(nonzero, 1, keepdims=True), 0)
                    foldspec2[:,:,ibin] = nfoldspec
                    # reset for next iteration
                    foldspec *= 0
                    icount *= 0

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j+1, nt))

    if do_waterfall:
        nonzero = waterfall == 0.
        waterfall -= np.where(nonzero,
                              np.sum(waterfall, 1, keepdims=True) /
                              np.sum(nonzero, 1, keepdims=True), 0.)

    return foldspec2, waterfall
def fold(file1, dtype, samplerate, fedge, fedge_at_top, nchan,
         nt, ntint, nhead, ngate, ntbin, ntw, dm, fref, phasepol,
         coherent=False, do_waterfall=True, do_foldspec=True, verbose=True,
    """FFT ARO data, fold by phase/time and make a waterfall series

    file1 : string
        name of the file holding voltage timeseries
    dtype : numpy dtype or '4bit' or '1bit'
        way the data are stored in the file
    samplerate : float
        rate at which samples were originally taken and thus double the
        band width (frequency units)
    fedge : float
        edge of the frequency band (frequency units)
    fedge_at_top: book
        whether edge is at top (True) or bottom (False)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*ntint, with each sample containing
        a single polarisation
    nhead : int
        number of bytes to skip before reading (usually 0 for ARO)
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of part of the file that is read (i.e., ignoring nhead)
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets

    # initialize folded spectrum and waterfall
    foldspec2 = np.zeros((nchan, ngate, ntbin))
    nwsize = nt*ntint//ntw
    waterfall = np.zeros((nchan, nwsize))

    # size in bytes of records read from file (simple for ARO: 1 byte/sample)
    recsize = nchan*ntint*{np.int8: 2, '4bit': 1}[dtype]
    if verbose:
        print('Reading from {}'.format(file1))

    with open(file1, 'rb', recsize) as fh1:

        if nhead > 0:
            if verbose:
                print('Skipping {0} bytes'.format(nhead))

        foldspec = np.zeros((nchan, ngate), dtype=np.int)
        icount = np.zeros((nchan, ngate), dtype=np.int)

        dt1 = (1./samplerate).to(u.s)
        if coherent:
            # pre-calculate required turns due to dispersion
            fcoh = (fedge - rfftfreq(nchan*ntint, dt1.value) * u.Hz
                    if fedge_at_top
                    fedge + rfftfreq(nchan*ntint, dt1.value) * u.Hz)
            # (check via eq. 5.21 and following in
            # Lorimer & Kramer, Handbook of Pulsar Astrono
            dang = (dispersion_delay_constant * dm * fcoh *
                    (1./fref-1./fcoh)**2) * 360. * u.deg
            dedisperse = np.exp(dang.to(u.rad).value * 1j
            # get these back into order r[0], r[1],i[1],...r[n-1],i[n-1],r[n]
            dedisperse = np.hstack([dedisperse[:1], dedisperse[2:-1]])
            # pre-calculate time delay due to dispersion;
            # [::2] sets frequency channels to numerical recipes ordering
            freq = (fedge - rfftfreq(nchan*2, dt1.value)[::2] * u.Hz
                    if fedge_at_top
                    fedge + rfftfreq(nchan*2, dt1.value)[::2] * u.Hz)

            dt = (dispersion_delay_constant * dm *
                  (1./freq**2 - 1./fref**2)).to(u.s).value

        # need 2*nchan samples for each FFT
        dtsample = (nchan*2/samplerate).to(u.s).value

        for j in xrange(nt):
            if verbose and (j+1) % progress_interval == 0:
                print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                    j+1, nt, dtsample*j*ntint))   # equivalent time since start

            # just in case numbers were set wrong -- break if file ends
            # better keep at least the work done
                # data just a series of bytes, each containing one 8 bit or
                # two 4-bit samples (set by dtype in caller)
                raw = fromfile(fh1, dtype, recsize)
            except(EOFError, IOError) as exc:
                print("Hit {}; writing pgm's".format(exc))

            vals = raw.astype(np.float32)
            if coherent:
                fine = rfft(vals, axis=0, overwrite_x=True)
                fine *= dedisperse
                vals = irfft(fine, axis=0, overwrite_x=True)

            chan2 = rfft(vals.reshape(-1, nchan*2), axis=-1,
            # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2]
            # re-order to Num.Rec. format: Re[0], Re[n/2], Re[1], ....
            power = np.hstack((chan2[:,:1]+chan2[:,-1:],

            # current sample positions in stream
            isr = j*ntint + np.arange(ntint)

            if do_waterfall:
                # loop over corresponding positions in waterfall
                for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1):
                    if iw < nwsize:  # add sum of corresponding samples
                        waterfall[:,iw] += np.sum(power[isr//ntw == iw],

            if do_foldspec:
                tsample = dtsample*isr  # times since start

                for k in xrange(nchan):
                    if coherent:
                        t = tsample  # already dedispersed
                        t = tsample - dt[k]  # dedispersed times

                    phase = phasepol(t)  # corresponding PSR phases
                    iphase = np.remainder(phase*ngate,
                    # sum and count samples by phase bin
                    foldspec[k] += np.bincount(iphase, power[:,k], ngate)
                    icount[k] += np.bincount(iphase, None, ngate)

                ibin = j*ntbin//nt  # bin in the time series: 0..ntbin-1
                if (j+1)*ntbin//nt > ibin:  # last addition to bin?
                    # get normalised flux in each bin (where any were added)
                    nonzero = icount > 0
                    nfoldspec = np.where(nonzero, foldspec/icount, 0.)
                    # subtract phase average and store
                    nfoldspec -= np.where(nonzero,
                                          np.sum(nfoldspec, 1, keepdims=True) /
                                          np.sum(nonzero, 1, keepdims=True), 0)
                    foldspec2[:,:,ibin] = nfoldspec
                    # reset for next iteration
                    foldspec *= 0
                    icount *= 0

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j+1, nt))

    if do_waterfall:
        nonzero = waterfall == 0.
        waterfall -= np.where(nonzero,
                              np.sum(waterfall, 1, keepdims=True) /
                              np.sum(nonzero, 1, keepdims=True), 0.)

    return foldspec2, waterfall
def fold(file1, file2, dtype, fbottom, fwidth, nchan,
         nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol,
         coherent=False, do_waterfall=True, do_foldspec=True, verbose=True,
         progress_interval=100, comm=None):
    """Fold pre-channelized LOFAR data, possibly dedispersing it

    file1, file2 : string
        names of the files holding real and imaginary subchannel timeseries
    dtype : numpy dtype
        way the data are stored in the file (normally '>f4')
    fbottom : float
        frequency of the lowest channel (frequency units)
    fwidth : float
        channel width (frequency units, normally 200*u.MHz/1024.)
    nchan : int
        number of frequency channels
    nt, ntint : int
        number nt of sets to use, each containing ntint samples;
        hence, total # of samples used is nt*ntint for each channel.
    nskip : int
        number of records (nskip*ntint*4*nchan bytes) to skip before reading
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        the start of the file that is read (i.e., including nskip)
    coherent : bool
        Whether to do dispersion coherently within finer channels
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets
    comm : MPI communicator (default: None
    if comm is not None:
        rank = comm.rank
        size = comm.size
        rank = 0
        size = 1
        def mpilofile(comm, file):
            return open(file)

    # initialize folded spectrum and waterfall
    if do_foldspec:
        foldspec = np.zeros((nchan, ngate, ntbin))
        icount = np.zeros((nchan, ngate, ntbin))
        foldspec = None
        icount = None
    if do_waterfall:
        nwsize = nt*ntint//ntw
        waterfall = np.zeros((nchan, nwsize))
        waterfall = None

    # # of items to read from file.
    itemsize = np.dtype(dtype).itemsize
    count = nchan*ntint
    if verbose and rank == 0:
        print('Reading from {}\n         and {}'.format(file1, file2))

    with mpilofile(comm, file1) as fh1, \
         mpilofile(comm, file2) as fh2:
        if nskip > 0:
            if verbose and rank == 0:
                print('Skipping {0} bytes'.format(nskip))
            # if # MPI processes > 1 we seek in for-loop
            if size == 1:
                fh1.seek(nskip * count * itemsize)
                fh2.seek(nskip * count * itemsize)

        dtsample = (1./fwidth).to(u.s)
        tstart = dtsample * nskip * ntint

        # pre-calculate time delay due to dispersion in course channels
        freq = fbottom + fwidth*np.arange(nchan)
        dt = (dispersion_delay_constant * dm *
              (1./freq**2 - 1./fref**2)).to(u.s).value

        if coherent:
            # pre-calculate required turns due to dispersion in fine channels
            fcoh = (freq[np.newaxis,:] +
                    fftfreq(ntint, dtsample.value)[:,np.newaxis] * u.Hz)
            # fcoh[fine, channel]
            # (check via eq. 5.21 and following in
            # Lorimer & Kramer, Handbook of Pulsar Astrono
            dang = (dispersion_delay_constant * dm * fcoh *
                    (1./freq - 1./fcoh)**2) * u.cycle
            dedisperse = np.exp(dang.to(u.rad).value * 1j

        for j in xrange(rank, nt, size):
            if verbose and j % progress_interval == 0:
                print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                    j+1, nt, (tstart+dtsample*j*ntint).value))
                # time since start of file

            # just in case numbers were set wrong -- break if file ends
            # better keep at least the work done
                # data stored as series of floats in two files,
                # one for real and one for imaginary
                if size > 1:
                    fh1.seek((nskip + j)*count*itemsize)
                    fh2.seek((nskip + j)*count*itemsize)
                raw1 = fromfile(fh1, dtype, count*itemsize).reshape(-1,nchan)
                raw2 = fromfile(fh2, dtype, count*itemsize).reshape(-1,nchan)

            # int 8 test
            iraw = (raw1*128.).astype(np.int8)
            raw1 = iraw.astype(np.float32)/128.
            iraw = (raw2*128.).astype(np.int8)
            raw2 = iraw.astype(np.float32)/128.

            if coherent:
                chan = raw1 + 1j*raw2
                # vals[#int, #chan]; FT channels to finely spaced grid
                fine = fft(chan, axis=0, overwrite_x=True)
                # fine[#fine, #chan]; correct for dispersion w/i chan
                fine *= dedisperse
                # fine[#fine, #chan]; FT back to channel timeseries
                chan = ifft(fine, axis=0, overwrite_x=True)
                # vals[#int, #chan]
                power = chan.real**2 + chan.imag**2
                # power[#int, #chan]; timeit -> 0.6x shorter than abs(chan)**2
                power = raw1**2 + raw2**2
                # power[#int, #chan]

            # current sample positions in stream
            isr = j*ntint + np.arange(ntint)

            if do_waterfall:
                # loop over corresponding positions in waterfall
                for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1):
                    if iw < nwsize:  # add sum of corresponding samples
                        waterfall[:,iw] += np.sum(power[isr//ntw == iw],

            if do_foldspec:
                tsample = (tstart + isr*dtsample).value  # times since start
                ibin = j*ntbin//nt
                for k in xrange(nchan):
                    t = tsample - dt[k]  # dedispersed times
                    phase = phasepol(t)  # corresponding PSR phases
                    iphase = np.remainder(phase*ngate,
                    # sum and count samples by phase bin
                    foldspec[k,:,ibin] += np.bincount(iphase, power[:,k], ngate)
                    icount[k,:,ibin] += np.bincount(iphase, None, ngate)

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j+1, nt))

    if do_waterfall:
        nonzero = waterfall == 0.
        waterfall -= np.where(nonzero,
                              np.sum(waterfall, 1, keepdims=True) /
                              np.sum(nonzero, 1, keepdims=True), 0.)

    return foldspec, icount, waterfall
 def record_read(self, count):
     return fromfile(self, self.dtype,
                     count).reshape(-1, self.nchan).squeeze()
def fold(file1, samplerate, fmid, nchan,
         nt, ntint, nhead, ngate, ntbin, ntw, dm, fref, phasepol,
         coherent=False, do_waterfall=True, do_foldspec=True, verbose=True,
    """FFT Effelsberg data, fold by phase/time and make a waterfall series

    file1 : string
        name of the file holding voltage timeseries
    samplerate : float
        rate at which samples were originally taken and thus band width
        (frequency units))
    fmid : float
        mid point of the frequency band (frequency units)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*(2*ntint), with each sample containing
        real,imag for two polarisations
    nhead : int
        number of bytes to skip before reading (usually 4096 for Effelsberg)
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of part of the file that is read (i.e., ignoring nhead)
    coherent : bool
        Whether to do dispersion coherently within finer channels
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets

    # initialize folded spectrum and waterfall
    foldspec2 = np.zeros((nchan, ngate, ntbin))
    nwsize = nt*ntint//ntw
    waterfall = np.zeros((nchan, nwsize))

    # size in bytes of records read from file (each nchan contains 4 bytes:
    # real,imag for 2 polarisations).
    recsize = 4*nchan*ntint
    if verbose:
        print('Reading from {}'.format(file1))

    myopen = gzip.open if '.gz' in file1 else open
    with myopen(file1, 'rb', recsize) as fh1:

        if nhead > 0:
            if verbose:
                print('Skipping {0} bytes'.format(nhead))

        foldspec = np.zeros((nchan, ngate))
        icount = np.zeros((nchan, ngate))

        dt1 = (1./samplerate).to(u.s)

        dtsample = nchan * dt1

        # pre-calculate time delay due to dispersion in course channels
        freq = fmid + fftfreq(nchan, dt1.value) * u.Hz
        dt = (dispersion_delay_constant * dm *
              (1./freq**2 - 1./fref**2)).to(u.s).value
        if coherent:
            # pre-calculate required turns due to dispersion in fine channels
            fcoh = (freq +
                    fftfreq(ntint, dtsample.value)[:,np.newaxis] * u.Hz)
            # (check via eq. 5.21 and following in
            # Lorimer & Kramer, Handbook of Pulsar Astrono
            dang = (dispersion_delay_constant * dm * fcoh *
                    (1./freq-1./fcoh)**2) * 360. * u.deg
            dedisperse = np.exp(dang.to(u.rad).value * 1j

        for j in xrange(nt):
            if verbose and j % progress_interval == 0:
                print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                    j+1, nt, dtsample.value*j*ntint))   # time since start

            # just in case numbers were set wrong -- break if file ends
            # better keep at least the work done
                # data stored as series of two two-byte complex numbers,
                # one for each polarization
                raw = fromfile(fh1, np.int8, recsize).reshape(-1,2,2)

            # use view for fast conversion from float to complex
            vals = raw.astype(np.float32).view(np.complex64).reshape(
                -1, nchan, 2)
            # vals[#int, #block, #pol]
            if coherent:
                # FT to channels, then FT those to finely spaced grid
                fine = fft2(vals, axes=(0,1), overwrite_x=True)
                # fine[#fine, #chan, #pol]; correct for dispersion w/i chan
                fine *= dedisperse[:,:,np.newaxis]
                # fine[#fine, #chan, #pol]; FT back to channel timeseries
                chan = ifft(fine, axis=0, overwrite_x=True)
                # vals[#int, #block, #pol]
                # FT channels
                chan = fft(vals, axis=1, overwrite_x=True)
                # chan[#int, #chan, #pol]

            power = np.sum(chan.real**2+chan.imag**2, axis=-1)
            # power[#int, #block]

            # current sample positions in stream
            isr = j*ntint + np.arange(ntint)

            if do_waterfall:
                # loop over corresponding positions in waterfall
                for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1):
                    if iw < nwsize:  # add sum of corresponding samples
                        waterfall[:,iw] += np.sum(power[isr//ntw == iw],

            if do_foldspec:
                tsample = dtsample.value * isr  # times since start

                for k in xrange(nchan):
                    t = tsample - dt[k]  # dedispersed times
                    phase = phasepol(t)  # corresponding PSR phases
                    iphase = np.remainder(phase*ngate,
                    # sum and count samples by phase bin
                    foldspec[k] += np.bincount(iphase, power[:,k], ngate)
                    icount[k] += np.bincount(iphase, None, ngate)

                ibin = j*ntbin//nt  # bin in the time series: 0..ntbin-1
                if (j+1)*ntbin//nt > ibin:  # last addition to bin?
                    # get normalised flux in each bin (where any were added)
                    nonzero = icount > 0
                    nfoldspec = np.where(nonzero, foldspec/icount, 0.)
                    # subtract phase average and store
                    nfoldspec -= np.where(nonzero,
                                          np.sum(nfoldspec, 1, keepdims=True) /
                                          np.sum(nonzero, 1, keepdims=True), 0)
                    foldspec2[:,:,ibin] = nfoldspec
                    # reset for next iteration
                    foldspec *= 0
                    icount *= 0

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j+1, nt))

    if do_foldspec:
        # swap two halfs in frequency, so that freq increases monotonically
        foldspec2 = fftshift(foldspec2, axes=0)

    if do_waterfall:
        nonzero = waterfall == 0.
        waterfall -= np.where(nonzero,
                              np.sum(waterfall, 1, keepdims=True) /
                              np.sum(nonzero, 1, keepdims=True), 0.)
        # swap two halfs in frequency, so that freq increases monotonically
        waterfall = fftshift(waterfall, axes=0)

    return foldspec2, waterfall
def fold(file1,
    """Fold pre-channelized LOFAR data, possibly dedispersing it

    file1, file2 : string
        names of the files holding real and imaginary subchannel timeseries
    dtype : numpy dtype
        way the data are stored in the file (normally '>f4')
    fbottom : float
        frequency of the lowest channel (frequency units)
    fwidth : float
        channel width (frequency units, normally 200*u.MHz/1024.)
    nchan : int
        number of frequency channels
    nt, ntint : int
        number nt of sets to use, each containing ntint samples;
        hence, total # of samples used is nt*ntint for each channel.
    nskip : int
        number of records (nskip*ntint*4*nchan bytes) to skip before reading
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        the start of the file that is read (i.e., including nskip)
    coherent : bool
        Whether to do dispersion coherently within finer channels
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets
    comm : MPI communicator (default: None
    if comm is not None:
        rank = comm.rank
        size = comm.size
        rank = 0
        size = 1

        def mpilofile(comm, file):
            return open(file)

    # initialize folded spectrum and waterfall
    if do_foldspec:
        foldspec = np.zeros((nchan, ngate, ntbin))
        icount = np.zeros((nchan, ngate, ntbin))
        foldspec = None
        icount = None
    if do_waterfall:
        nwsize = nt * ntint // ntw
        waterfall = np.zeros((nchan, nwsize))
        waterfall = None

    # # of items to read from file.
    itemsize = np.dtype(dtype).itemsize
    count = nchan * ntint
    if verbose and rank == 0:
        print('Reading from {}\n         and {}'.format(file1, file2))

    with mpilofile(comm, file1) as fh1, \
         mpilofile(comm, file2) as fh2:
        if nskip > 0:
            if verbose and rank == 0:
                print('Skipping {0} bytes'.format(nskip))
            # if # MPI processes > 1 we seek in for-loop
            if size == 1:
                fh1.seek(nskip * count * itemsize)
                fh2.seek(nskip * count * itemsize)

        dtsample = (1. / fwidth).to(u.s)
        tstart = dtsample * nskip * ntint

        # pre-calculate time delay due to dispersion in course channels
        freq = fbottom + fwidth * np.arange(nchan)
        dt = (dispersion_delay_constant * dm *
              (1. / freq**2 - 1. / fref**2)).to(u.s).value

        if coherent:
            # pre-calculate required turns due to dispersion in fine channels
            fcoh = (freq[np.newaxis, :] +
                    fftfreq(ntint, dtsample.value)[:, np.newaxis] * u.Hz)
            # fcoh[fine, channel]
            # (check via eq. 5.21 and following in
            # Lorimer & Kramer, Handbook of Pulsar Astrono
            dang = (dispersion_delay_constant * dm * fcoh *
                    (1. / freq - 1. / fcoh)**2) * u.cycle
            dedisperse = np.exp(dang.to(u.rad).value * 1j).conj().astype(

        for j in xrange(rank, nt, size):
            if verbose and j % progress_interval == 0:
                print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                    j + 1, nt, (tstart + dtsample * j * ntint).value))
                # time since start of file

            # just in case numbers were set wrong -- break if file ends
            # better keep at least the work done
                # data stored as series of floats in two files,
                # one for real and one for imaginary
                if size > 1:
                    fh1.seek((nskip + j) * count * itemsize)
                    fh2.seek((nskip + j) * count * itemsize)
                raw1 = fromfile(fh1, dtype,
                                count * itemsize).reshape(-1, nchan)
                raw2 = fromfile(fh2, dtype,
                                count * itemsize).reshape(-1, nchan)
            except (EOFError):

            # int 8 test
            iraw = (raw1 * 128.).astype(np.int8)
            raw1 = iraw.astype(np.float32) / 128.
            iraw = (raw2 * 128.).astype(np.int8)
            raw2 = iraw.astype(np.float32) / 128.

            if coherent:
                chan = raw1 + 1j * raw2
                # vals[#int, #chan]; FT channels to finely spaced grid
                fine = fft(chan, axis=0, overwrite_x=True)
                # fine[#fine, #chan]; correct for dispersion w/i chan
                fine *= dedisperse
                # fine[#fine, #chan]; FT back to channel timeseries
                chan = ifft(fine, axis=0, overwrite_x=True)
                # vals[#int, #chan]
                power = chan.real**2 + chan.imag**2
                # power[#int, #chan]; timeit -> 0.6x shorter than abs(chan)**2
                power = raw1**2 + raw2**2
                # power[#int, #chan]

            # current sample positions in stream
            isr = j * ntint + np.arange(ntint)

            if do_waterfall:
                # loop over corresponding positions in waterfall
                for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1):
                    if iw < nwsize:  # add sum of corresponding samples
                        waterfall[:, iw] += np.sum(power[isr // ntw == iw],

            if do_foldspec:
                tsample = (tstart + isr * dtsample).value  # times since start
                ibin = j * ntbin // nt
                for k in xrange(nchan):
                    t = tsample - dt[k]  # dedispersed times
                    phase = phasepol(t)  # corresponding PSR phases
                    iphase = np.remainder(phase * ngate, ngate).astype(np.int)
                    # sum and count samples by phase bin
                    foldspec[k, :,
                             ibin] += np.bincount(iphase, power[:, k], ngate)
                    icount[k, :, ibin] += np.bincount(iphase, None, ngate)

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j + 1, nt))

    if do_waterfall:
        nonzero = waterfall == 0.
        waterfall -= np.where(
            np.sum(waterfall, 1, keepdims=True) /
            np.sum(nonzero, 1, keepdims=True), 0.)

    return foldspec, icount, waterfall
 def record_read(self, count):
     return fromfile(self, self.dtype,
                     count).reshape(-1, self.nchan).squeeze()
def fold(fh1, dtype, samplerate, fedge, fedge_at_top, nchan,
         nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol,
         do_waterfall=True, do_foldspec=True, verbose=True,
         progress_interval=100, comm=None):
    """FFT GMRT data, fold by phase/time and make a waterfall series

    fh1 : file handle
        handle to file holding voltage timeseries
    dtype : numpy dtype or '4bit' or '1bit'
        way the data are stored in the file
    samplerate : float
        rate at which samples were originally taken and thus double the
        band width (frequency units)
    fedge : float
        edge of the frequency band (frequency units)
    fedge_at_top: bool
        whether edge is at top (True) or bottom (False)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*ntint, with each sample containing
        a single polarisation
    nskip : int
        number of records (nchan*ntint*2 for phased data w/ np.int8 real,imag)
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of part of the file that is read (i.e., ignoring nhead)
    dedisperse : None or string
        None, 'incoherent', 'coherent', 'by-channel'
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets
    comm : MPI communicator (default: None)
    if comm is None:
        rank = 0
        size = 1
        rank = comm.rank
        size = comm.size

    # initialize folded spectrum and waterfall
    foldspec2 = np.zeros((nchan, ngate, ntbin))
    nwsize = nt*ntint//ntw
    waterfall = np.zeros((nchan, nwsize))

    # size in bytes of records read from file (simple for ARO: 1 byte/sample)
    # double since we need to get ntint samples after FFT
    itemsize = {np.int8: 2}[dtype]
    recsize = nchan*ntint*itemsize

    if verbose:
        print('Reading from {}'.format(fh1))

    if nskip > 0:
        if verbose:
            print('Skipping {0} {1}-byte records'.format(nskip, recsize))
        if size == 1:
            fh1.seek(nskip * recsize)

    foldspec = np.zeros((nchan, ngate, ntbin), dtype=np.int)
    icount = np.zeros((nchan, ngate, ntbin), dtype=np.int)

    dt1 = (1./samplerate).to(u.s)
    # but include 2*nchan real-valued samples used for each FFT
    # (or, equivalently, real and imag for channels)
    dtsample = nchan * 2 * dt1
    tstart = dt1 * nskip * recsize

    # pre-calculate time delay due to dispersion in coarse channels
    freq = fftshift(fftfreq(nchan, 2.*dt1.value)) * u.Hz
    freq = (fedge - (freq-freq[0])
            if fedge_at_top
            else fedge + (freq-freq[0]))
    # [::2] sets frequency channels to numerical recipes ordering
    dt = (dispersion_delay_constant * dm *
          (1./freq**2 - 1./fref**2)).to(u.s).value
    # if dedisperse in {'coherent', 'by-channel'}:
    #     # pre-calculate required turns due to dispersion
    #     fcoh = (fedge - fftfreq(nchan*ntint, 2.*dt1)
    #             if fedge_at_top
    #             else
    #             fedge + fftfreq(nchan*ntint, 2.*dt1))
    #     # set frequency relative to which dispersion is coherently corrected
    #     if dedisperse == 'coherent':
    #         _fref = fref
    #     else:
    #         _fref = np.repeat(freq.value, ntint) * freq.unit
    #     # (check via eq. 5.21 and following in
    #     # Lorimer & Kramer, Handbook of Pulsar Astrono
    #     dang = (dispersion_delay_constant * dm * fcoh *
    #             (1./_fref-1./fcoh)**2) * 360. * u.deg
    #     # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n]
    #     # for 0 and n need only real part, but for 1...n-1 need real, imag
    #     # so just get shifts for r[1], r[2], ..., r[n-1]
    #     dang = dang.to(u.rad).value[1:-1:2]
    #     dd_coh = np.exp(dang * 1j).conj().astype(np.complex64)

    for j in xrange(rank, nt, size):
        if verbose and j % progress_interval == 0:
            print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                j+1, nt, (tstart+dtsample*j*ntint).value))   # time since start

        # just in case numbers were set wrong -- break if file ends
        # better keep at least the work done
            if size > 1:
                fh1.seek((nskip + j) * recsize)
            # data just a series of byte pairs, of real and imag
            raw = fromfile(fh1, dtype, recsize)
        except(EOFError, IOError) as exc:
            print("Hit {}; writing pgm's".format(exc))
        if verbose == 'very':
            print("Read {} items".format(raw.size), end="")

        vals = raw.astype(np.float32).view(np.complex64).squeeze()
        # if dedisperse in {'coherent', 'by-channel'}:
        #     fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs)
        #     fine_cmplx = fine[1:-1].view(np.complex64)
        #     fine_cmplx *= dd_coh  # this overwrites parts of fine, as intended
        #     vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs)
        #     if verbose == 'very':
        #         print("... dedispersed", end="")

        chan = vals.reshape(-1, nchan)
        if verbose == 'very':
            print("... power", end="")
        power = chan.real**2+chan.imag**2

        # current sample positions in stream
        isr = j*ntint + np.arange(ntint)

        if do_waterfall:
            # loop over corresponding positions in waterfall
            for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1):
                if iw < nwsize:  # add sum of corresponding samples
                    waterfall[:,iw] += np.sum(power[isr//ntw == iw],
            if verbose == 'very':
                print("... waterfall", end="")

        if do_foldspec:
            tsample = (tstart + isr*dtsample).value  # times since start

            ibin = j*ntbin//nt  # bin in the time series: 0..ntbin-1

            for k in xrange(nchan):
                if dedisperse == 'coherent':
                    t = tsample  # already dedispersed
                    t = tsample - dt[k]  # dedispersed times

                phase = phasepol(t)  # corresponding PSR phases
                iphase = np.remainder(phase*ngate,
                # sum and count samples by phase bin
                foldspec[k,:,ibin] += np.bincount(iphase, power[:,k], ngate)
                icount[k,:,ibin] += np.bincount(iphase, None, ngate)

            if verbose == 'very':
                print("... folded", end="")

            if 0: #done in gmrt.py (j+1)*ntbin//nt > ibin:  # last addition to bin?
                # get normalised flux in each bin (where any were added)
                nonzero = icount > 0
                nfoldspec = np.where(nonzero, foldspec/icount, 0.)
                # subtract phase average and store
                nfoldspec -= np.where(nonzero,
                                      np.sum(nfoldspec, 1, keepdims=True) /
                                      np.sum(nonzero, 1, keepdims=True), 0)
                foldspec2[:,:,ibin] = nfoldspec
                # reset for next iteration
                foldspec *= 0
                icount *= 0
                if verbose == 'very':
                    print("... added", end="")
        if verbose == 'very':
            print("... done")

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j+1, nt))

    if 0: # done in gmrt.py do_waterfall:
        nonzero = waterfall == 0.
        waterfall -= np.where(nonzero,
                              np.sum(waterfall, 1, keepdims=True) /
                              np.sum(nonzero, 1, keepdims=True), 0.)

    return foldspec, icount, waterfall
def fold(fh1,
    """FFT GMRT data, fold by phase/time and make a waterfall series

    fh1 : file handle
        handle to file holding voltage timeseries
    dtype : numpy dtype or '4bit' or '1bit'
        way the data are stored in the file
    samplerate : float
        rate at which samples were originally taken and thus double the
        band width (frequency units)
    fedge : float
        edge of the frequency band (frequency units)
    fedge_at_top: bool
        whether edge is at top (True) or bottom (False)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*ntint, with each sample containing
        a single polarisation
    nskip : int
        number of records (nchan*ntint*2 for phased data w/ np.int8 real,imag)
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of part of the file that is read (i.e., ignoring nhead)
    dedisperse : None or string
        None, 'incoherent', 'coherent', 'by-channel'
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets
    comm : MPI communicator (default: None)
    if comm is None:
        rank = 0
        size = 1
        rank = comm.rank
        size = comm.size

    # initialize folded spectrum and waterfall
    foldspec2 = np.zeros((nchan, ngate, ntbin))
    nwsize = nt * ntint // ntw
    waterfall = np.zeros((nchan, nwsize))

    # size in bytes of records read from file (simple for ARO: 1 byte/sample)
    # double since we need to get ntint samples after FFT
    itemsize = {np.int8: 2}[dtype]
    recsize = nchan * ntint * itemsize

    if verbose:
        print('Reading from {}'.format(fh1))

    if nskip > 0:
        if verbose:
            print('Skipping {0} {1}-byte records'.format(nskip, recsize))
        if size == 1:
            fh1.seek(nskip * recsize)

    foldspec = np.zeros((nchan, ngate, ntbin), dtype=np.int)
    icount = np.zeros((nchan, ngate, ntbin), dtype=np.int)

    dt1 = (1. / samplerate).to(u.s)
    # but include 2*nchan real-valued samples used for each FFT
    # (or, equivalently, real and imag for channels)
    dtsample = nchan * 2 * dt1
    tstart = dt1 * nskip * recsize

    # pre-calculate time delay due to dispersion in coarse channels
    freq = fftshift(fftfreq(nchan, 2. * dt1.value)) * u.Hz
    freq = (fedge - (freq - freq[0]) if fedge_at_top else fedge +
            (freq - freq[0]))
    # [::2] sets frequency channels to numerical recipes ordering
    dt = (dispersion_delay_constant * dm * (1. / freq**2 - 1. / fref**2)).to(
    # if dedisperse in {'coherent', 'by-channel'}:
    #     # pre-calculate required turns due to dispersion
    #     fcoh = (fedge - fftfreq(nchan*ntint, 2.*dt1)
    #             if fedge_at_top
    #             else
    #             fedge + fftfreq(nchan*ntint, 2.*dt1))
    #     # set frequency relative to which dispersion is coherently corrected
    #     if dedisperse == 'coherent':
    #         _fref = fref
    #     else:
    #         _fref = np.repeat(freq.value, ntint) * freq.unit
    #     # (check via eq. 5.21 and following in
    #     # Lorimer & Kramer, Handbook of Pulsar Astrono
    #     dang = (dispersion_delay_constant * dm * fcoh *
    #             (1./_fref-1./fcoh)**2) * 360. * u.deg
    #     # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n]
    #     # for 0 and n need only real part, but for 1...n-1 need real, imag
    #     # so just get shifts for r[1], r[2], ..., r[n-1]
    #     dang = dang.to(u.rad).value[1:-1:2]
    #     dd_coh = np.exp(dang * 1j).conj().astype(np.complex64)

    for j in xrange(rank, nt, size):
        if verbose and j % progress_interval == 0:
            print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                j + 1, nt,
                (tstart + dtsample * j * ntint).value))  # time since start

        # just in case numbers were set wrong -- break if file ends
        # better keep at least the work done
            if size > 1:
                fh1.seek((nskip + j) * recsize)
            # data just a series of byte pairs, of real and imag
            raw = fromfile(fh1, dtype, recsize)
        except (EOFError, IOError) as exc:
            print("Hit {}; writing pgm's".format(exc))
        if verbose == 'very':
            print("Read {} items".format(raw.size), end="")

        vals = raw.astype(np.float32).view(np.complex64).squeeze()
        # if dedisperse in {'coherent', 'by-channel'}:
        #     fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs)
        #     fine_cmplx = fine[1:-1].view(np.complex64)
        #     fine_cmplx *= dd_coh  # this overwrites parts of fine, as intended
        #     vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs)
        #     if verbose == 'very':
        #         print("... dedispersed", end="")

        chan = vals.reshape(-1, nchan)
        if verbose == 'very':
            print("... power", end="")
        power = chan.real**2 + chan.imag**2

        # current sample positions in stream
        isr = j * ntint + np.arange(ntint)

        if do_waterfall:
            # loop over corresponding positions in waterfall
            for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1):
                if iw < nwsize:  # add sum of corresponding samples
                    waterfall[:, iw] += np.sum(power[isr // ntw == iw], axis=0)
            if verbose == 'very':
                print("... waterfall", end="")

        if do_foldspec:
            tsample = (tstart + isr * dtsample).value  # times since start

            ibin = j * ntbin // nt  # bin in the time series: 0..ntbin-1

            for k in xrange(nchan):
                if dedisperse == 'coherent':
                    t = tsample  # already dedispersed
                    t = tsample - dt[k]  # dedispersed times

                phase = phasepol(t)  # corresponding PSR phases
                iphase = np.remainder(phase * ngate, ngate).astype(np.int)
                # sum and count samples by phase bin
                foldspec[k, :, ibin] += np.bincount(iphase, power[:, k], ngate)
                icount[k, :, ibin] += np.bincount(iphase, None, ngate)

            if verbose == 'very':
                print("... folded", end="")

            if 0:  #done in gmrt.py (j+1)*ntbin//nt > ibin:  # last addition to bin?
                # get normalised flux in each bin (where any were added)
                nonzero = icount > 0
                nfoldspec = np.where(nonzero, foldspec / icount, 0.)
                # subtract phase average and store
                nfoldspec -= np.where(
                    np.sum(nfoldspec, 1, keepdims=True) /
                    np.sum(nonzero, 1, keepdims=True), 0)
                foldspec2[:, :, ibin] = nfoldspec
                # reset for next iteration
                foldspec *= 0
                icount *= 0
                if verbose == 'very':
                    print("... added", end="")
        if verbose == 'very':
            print("... done")

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j + 1, nt))

    if 0:  # done in gmrt.py do_waterfall:
        nonzero = waterfall == 0.
        waterfall -= np.where(
            np.sum(waterfall, 1, keepdims=True) /
            np.sum(nonzero, 1, keepdims=True), 0.)

    return foldspec, icount, waterfall
def fold(fh1,
    """FFT ARO data, fold by phase/time and make a waterfall series

    fh1 : file handle
        handle to file holding voltage timeseries
    dtype : numpy dtype or '4bit' or '1bit'
        way the data are stored in the file
    samplerate : float
        rate at which samples were originally taken and thus double the
        band width (frequency units)
    fedge : float
        edge of the frequency band (frequency units)
    fedge_at_top: bool
        whether edge is at top (True) or bottom (False)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*ntint, with each sample containing
        a single polarisation
    nhead : int
        number of bytes to skip before reading (usually 0 for ARO)
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of part of the file that is read (i.e., ignoring nhead)
    dedisperse : None or string
        None, 'incoherent', 'coherent', 'by-channel'
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets

    # initialize folded spectrum and waterfall
    foldspec2 = np.zeros((nchan, ngate, ntbin))
    nwsize = nt * ntint // ntw
    waterfall = np.zeros((nchan, nwsize))

    # size in bytes of records read from file (simple for ARO: 1 byte/sample)
    # double since we need to get ntint samples after FFT
    recsize = nchan * ntint * {np.int8: 2, '4bit': 1}[dtype]
    if verbose:
        print('Reading from {}'.format(fh1))

    if nhead > 0:
        if verbose:
            print('Skipping {0} bytes'.format(nhead))

    foldspec = np.zeros((nchan, ngate), dtype=np.int)
    icount = np.zeros((nchan, ngate), dtype=np.int)

    dt1 = (1. / samplerate).to(u.s)
    # need 2*nchan real-valued samples for each FFT
    dtsample = nchan * 2 * dt1

    # pre-calculate time delay due to dispersion in coarse channels
    freq = (fedge -
            rfftfreq(nchan * 2, dt1.value) * u.Hz if fedge_at_top else fedge +
            rfftfreq(nchan * 2, dt1.value) * u.Hz)
    # [::2] sets frequency channels to numerical recipes ordering
    dt = (dispersion_delay_constant * dm *
          (1. / freq[::2]**2 - 1. / fref**2)).to(u.s).value
    if dedisperse in {'coherent', 'by-channel'}:
        # pre-calculate required turns due to dispersion
        fcoh = (fedge - rfftfreq(nchan * 2 * ntint, dt1.value) * u.Hz
                if fedge_at_top else fedge +
                rfftfreq(nchan * 2 * ntint, dt1.value) * u.Hz)
        # set frequency relative to which dispersion is coherently corrected
        if dedisperse == 'coherent':
            _fref = fref
            # _fref = np.round((fcoh * dtsample).to(1).value) / dtsample
            _fref = np.repeat(freq.value, ntint) * freq.unit
        # (check via eq. 5.21 and following in
        # Lorimer & Kramer, Handbook of Pulsar Astrono
        dang = (dispersion_delay_constant * dm * fcoh *
                (1. / _fref - 1. / fcoh)**2) * 360. * u.deg
        # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n]
        # for 0 and n need only real part, but for 1...n-1 need real, imag
        # so just get shifts for r[1], r[2], ..., r[n-1]
        dang = dang.to(u.rad).value[1:-1:2]
        dd_coh = np.exp(dang * 1j).conj().astype(np.complex64)

    for j in xrange(nt):
        if verbose and j % progress_interval == 0:
            print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                j + 1, nt, dtsample.value * j * ntint))  # time since start

        # just in case numbers were set wrong -- break if file ends
        # better keep at least the work done
            # data just a series of bytes, each containing one 8 bit or
            # two 4-bit samples (set by dtype in caller)
            raw = fromfile(fh1, dtype, recsize)
        except (EOFError, IOError) as exc:
            print("Hit {}; writing pgm's".format(exc))
        if verbose == 'very':
            print("Read {} items".format(raw.size), end="")

        vals = raw.astype(np.float32)
        if dedisperse in {'coherent', 'by-channel'}:
            fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs)
            fine_cmplx = fine[1:-1].view(np.complex64)
            fine_cmplx *= dd_coh  # this overwrites parts of fine, as intended
            vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs)
            if verbose == 'very':
                print("... dedispersed", end="")

        chan2 = rfft(vals.reshape(-1, nchan * 2),
        # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2]
        # re-order to Num.Rec. format: Re[0], Re[n/2], Re[1], ....
        power = np.hstack((chan2[:, :1] + chan2[:, -1:],
                           chan2[:, 1:-1].reshape(-1, nchan - 1, 2).sum(-1)))
        if verbose == 'very':
            print("... power", end="")

        # current sample positions in stream
        isr = j * ntint + np.arange(ntint)

        if do_waterfall:
            # loop over corresponding positions in waterfall
            for iw in xrange(isr[0] // ntw, isr[-1] // ntw + 1):
                if iw < nwsize:  # add sum of corresponding samples
                    waterfall[:, iw] += np.sum(power[isr // ntw == iw], axis=0)
            if verbose == 'very':
                print("... waterfall", end="")

        if do_foldspec:
            tsample = dtsample.value * isr  # times since start

            for k in xrange(nchan):
                if dedisperse == 'coherent':
                    t = tsample  # already dedispersed
                    t = tsample - dt[k]  # dedispersed times

                phase = phasepol(t)  # corresponding PSR phases
                iphase = np.remainder(phase * ngate, ngate).astype(np.int)
                # sum and count samples by phase bin
                foldspec[k] += np.bincount(iphase, power[:, k], ngate)
                icount[k] += np.bincount(iphase, None, ngate)

            if verbose == 'very':
                print("... folded", end="")

            ibin = j * ntbin // nt  # bin in the time series: 0..ntbin-1
            if (j + 1) * ntbin // nt > ibin:  # last addition to bin?
                # get normalised flux in each bin (where any were added)
                nonzero = icount > 0
                nfoldspec = np.where(nonzero, foldspec / icount, 0.)
                # subtract phase average and store
                nfoldspec -= np.where(
                    np.sum(nfoldspec, 1, keepdims=True) /
                    np.sum(nonzero, 1, keepdims=True), 0)
                foldspec2[:, :, ibin] = nfoldspec
                # reset for next iteration
                foldspec *= 0
                icount *= 0
                if verbose == 'very':
                    print("... added", end="")
        if verbose == 'very':
            print("... done")

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j + 1, nt))

    if do_waterfall:
        nonzero = waterfall == 0.
        waterfall -= np.where(
            np.sum(waterfall, 1, keepdims=True) /
            np.sum(nonzero, 1, keepdims=True), 0.)

    return foldspec2, waterfall
def fold(fh1, dtype, samplerate, fedge, fedge_at_top, nchan,
         nt, ntint, nskip, ngate, ntbin, ntw, dm, fref, phasepol,
         do_waterfall=True, do_foldspec=True, verbose=True,
         progress_interval=100, rfi_filter_raw=None, rfi_filter_power=None,
    """FFT ARO data, fold by phase/time and make a waterfall series

    fh1 : file handle
        handle to file holding voltage timeseries
    dtype : numpy dtype or '4bit' or '1bit'
        way the data are stored in the file
    samplerate : float
        rate at which samples were originally taken and thus double the
        band width (frequency units)
    fedge : float
        edge of the frequency band (frequency units)
    fedge_at_top: bool
        whether edge is at top (True) or bottom (False)
    nchan : int
        number of frequency channels for FFT
    nt, ntint : int
        total number nt of sets, each containing ntint samples in each file
        hence, total # of samples is nt*ntint, with each sample containing
        a single polarisation
    nskip : int
        number of records (ntint * nchan * 2 / 2 bytes) to skip
    ngate, ntbin : int
        number of phase and time bins to use for folded spectrum
        ntbin should be an integer fraction of nt
    ntw : int
        number of time samples to combine for waterfall (does not have to be
        integer fraction of nt)
    dm : float
        dispersion measure of pulsar, used to correct for ism delay
        (column number density)
    fref: float
        reference frequency for dispersion measure
    phasepol : callable
        function that returns the pulsar phase for time in seconds relative to
        start of part of the file that is read (i.e., ignoring nhead)
    dedisperse : None or string
        None, 'incoherent', 'coherent', 'by-channel'
    do_waterfall, do_foldspec : bool
        whether to construct waterfall, folded spectrum (default: True)
    verbose : bool
        whether to give some progress information (default: True)
    progress_interval : int
        Ping every progress_interval sets
    comm : MPI communicator (default None)
    if comm is None:
        rank = 0
        size = 1
        rank = comm.rank
        size = comm.size
    # initialize folded spectrum and waterfall
    foldspec = np.zeros((nchan, ngate, ntbin))
    icount = np.zeros((nchan, ngate, ntbin), dtype=np.int64)
    nwsize = nt*ntint//ntw
    waterfall = np.zeros((nchan, nwsize))

    # size in bytes of records read from file (simple for ARO: 1 byte/sample)
    # double since we need to get ntint samples after FFT
    recsize = nchan*ntint*{np.int8: 2, '4bit': 1}[dtype]
    if verbose:
        print('Reading from {}'.format(fh1))

    if nskip > 0:
        if verbose:
            print('Skipping {0} records = {1} bytes'
                  .format(nskip, nskip*recsize))
        # If MPI threading, the threads hop over one-another
        # and seeking is done in for-loop.
        if size == 1:
            fh1.seek(nskip * recsize)

    dt1 = (1./samplerate).to(u.s)
    # need 2*nchan real-valued samples for each FFT
    dtsample = nchan * 2 * dt1
    tstart = dtsample * ntint * nskip

    # pre-calculate time delay due to dispersion in coarse channels
    freq = (fedge - rfftfreq(nchan*2, dt1.value) * u.Hz
            if fedge_at_top
            fedge + rfftfreq(nchan*2, dt1.value) * u.Hz)
    # [::2] sets frequency channels to numerical recipes ordering
    dt = (dispersion_delay_constant * dm *
          (1./freq[::2]**2 - 1./fref**2)).to(u.s).value
    if dedisperse in {'coherent', 'by-channel'}:
        # pre-calculate required turns due to dispersion
        fcoh = (fedge - rfftfreq(nchan*2*ntint, dt1.value) * u.Hz
                if fedge_at_top
                fedge + rfftfreq(nchan*2*ntint, dt1.value) * u.Hz)
        # set frequency relative to which dispersion is coherently corrected
        if dedisperse == 'coherent':
            _fref = fref
            # _fref = np.round((fcoh * dtsample).to(1).value) / dtsample
            _fref = np.repeat(freq.value, ntint) * freq.unit
        # (check via eq. 5.21 and following in
        # Lorimer & Kramer, Handbook of Pulsar Astrono
        dang = (dispersion_delay_constant * dm * fcoh *
                (1./_fref-1./fcoh)**2) * 360. * u.deg
        # order of frequencies is r[0], r[1],i[1],...r[n-1],i[n-1],r[n]
        # for 0 and n need only real part, but for 1...n-1 need real, imag
        # so just get shifts for r[1], r[2], ..., r[n-1]
        dang = dang.to(u.rad).value[1:-1:2]
        dd_coh = np.exp(dang * 1j).conj().astype(np.complex64)

    for j in xrange(rank, nt, size):
        if verbose and j % progress_interval == 0:
            print('Doing {:6d}/{:6d}; time={:18.12f}'.format(
                j+1, nt, (tstart+dtsample*j*ntint).value))  # time since start

        # just in case numbers were set wrong -- break if file ends
        # better keep at least the work done
            # data just a series of bytes, each containing one 8 bit or
            # two 4-bit samples (set by dtype in caller)
            if size > 1:
            raw = fromfile(fh1, dtype, recsize)
        except(EOFError, IOError) as exc:
            print("Hit {}; writing pgm's".format(exc))
        if verbose == 'very':
            print("Read {} items".format(raw.size), end="")

        if rfi_filter_raw:
            raw = rfi_filter_raw(raw)
            print("... raw RFI", end="")

        vals = raw.astype(np.float32)
        if dedisperse in {'coherent', 'by-channel'}:
            fine = rfft(vals, axis=0, overwrite_x=True, **_fftargs)
            fine_cmplx = fine[1:-1].view(np.complex64)
            fine_cmplx *= dd_coh  # this overwrites parts of fine, as intended
            vals = irfft(fine, axis=0, overwrite_x=True, **_fftargs)
            if verbose == 'very':
                print("... dedispersed", end="")

        chan2 = rfft(vals.reshape(-1, nchan*2), axis=-1,
                     overwrite_x=True, **_fftargs)**2
        # rfft: Re[0], Re[1], Im[1], ..., Re[n/2-1], Im[n/2-1], Re[n/2]
        # re-order to Num.Rec. format: Re[0], Re[n/2], Re[1], ....
        power = np.hstack((chan2[:,:1]+chan2[:,-1:],

        if verbose == 'very':
            print("... power", end="")

        if rfi_filter_power:
            power = rfi_filter_power(power)
            print("... power RFI", end="")

        # current sample positions in stream
        isr = j*ntint + np.arange(ntint)

        if do_waterfall:
            # loop over corresponding positions in waterfall
            for iw in xrange(isr[0]//ntw, isr[-1]//ntw + 1):
                if iw < nwsize:  # add sum of corresponding samples
                    waterfall[:,iw] += np.sum(power[isr//ntw == iw],
            if verbose == 'very':
                print("... waterfall", end="")

        if do_foldspec:
            tsample = (tstart + isr*dtsample).value  # times since start
            ibin = j*ntbin//nt  # bin in the time series: 0..ntbin-1

            for k in xrange(nchan):
                if dedisperse == 'coherent':
                    t = tsample  # already dedispersed
                    t = tsample - dt[k]  # dedispersed times

                phase = phasepol(t)  # corresponding PSR phases
                iphase = np.remainder(phase*ngate,
                # sum and count samples by phase bin
                foldspec[k, :, ibin] += np.bincount(iphase, power[:, k], ngate)
                icount[k, :, ibin] += np.bincount(iphase, power[:, k] != 0.,

            if verbose == 'very':
                print("... folded", end="")

        if verbose == 'very':
            print("... done")

    if verbose:
        print('read {0:6d} out of {1:6d}'.format(j+1, nt))

    return foldspec, icount, waterfall