def fft_convolve(fftd_data, fftd_kern, lo, hi): """ fft_convolve(fftd_data, fftd_kern, lo, hi): Perform a convolution with the complex floating point vectors 'fftd_data' and 'fftd_kern'. The returned vector will start at at bin 'lo' (must be an integer), and go up to but not include bin 'hi' (also an integer). """ # Note: The initial FFTs should be done like: # fftd_kern = rfft(kernel, -1) # fftd_data = rfft(data, -1) prod = Num.multiply(fftd_data, fftd_kern) prod.real[0] = fftd_kern.real[0] * fftd_data.real[0] prod.imag[0] = fftd_kern.imag[0] * fftd_data.imag[0] return rfft(prod, 1)[lo:hi].astype(Num.float32)
def make_fftd_kerns(downfacts, fftlen): fftd_kerns = [] for downfact in downfacts: kern = Num.zeros(fftlen, dtype=Num.float32) # These offsets produce kernels that give results # equal to scipy.signal.convolve if downfact % 2: # Odd number kern[:downfact // 2 + 1] += 1.0 kern[-(downfact // 2):] += 1.0 else: # Even number kern[:downfact // 2 + 1] += 1.0 if (downfact > 2): kern[-(downfact // 2 - 1):] += 1.0 # The following normalization preserves the # RMS=1 characteristic of the data fftd_kerns.append(rfft(kern / Num.sqrt(downfact), -1)) return fftd_kerns
from __future__ import print_function import numpy as np from presto import presto import os from os import path here = path.dirname(__file__) print("Testing FFT stuff...", end=' ') N = 20 x = np.random.standard_normal(N) nx = presto.rfft(presto.rfft(x, -1), 1) assert(np.allclose(x, nx, atol=1e-6)) print("success") print("Testing FFTW call...", end=' ') cx = np.random.standard_normal(N).astype(np.complex64) ncx = np.array(cx, copy=1) presto.fftwcall(cx, -1) presto.fftwcall(cx, 1) assert(np.allclose(cx/N, ncx, atol=1e-6)) print("success") print("Testing tablesixstepfft call...", end=' ') cx = np.random.standard_normal(N).astype(np.complex64) ncx = np.array(cx, copy=1) presto.tablesixstepfft(cx, -1) presto.tablesixstepfft(cx, 1) assert(np.allclose(cx/N, ncx, atol=1e-6)) print("success")
def main(): parser = OptionParser(usage) parser.add_option( "-x", "--xwin", action="store_true", dest="xwin", default=False, help="Don't make a postscript plot, just use an X-window") parser.add_option("-p", "--noplot", action="store_false", dest="makeplot", default=True, help="Look for pulses but do not generate a plot") parser.add_option( "-m", "--maxwidth", type="float", dest="maxwidth", default=0.0, help="Set the max downsampling in sec (see below for default)") parser.add_option("-t", "--threshold", type="float", dest="threshold", default=5.0, help="Set a different threshold SNR (default=5.0)") parser.add_option("-s", "--start", type="float", dest="T_start", default=0.0, help="Only plot events occuring after this time (s)") parser.add_option("-e", "--end", type="float", dest="T_end", default=1e9, help="Only plot events occuring before this time (s)") parser.add_option("-g", "--glob", type="string", dest="globexp", default=None, help="Process the files from this glob expression") parser.add_option("-f", "--fast", action="store_true", dest="fast", default=False, help="Use a faster method of de-trending (2x speedup)") parser.add_option( "-b", "--nobadblocks", action="store_false", dest="badblocks", default=True, help="Don't check for bad-blocks (may save strong pulses)") parser.add_option("-d", "--detrendlen", type="int", dest="detrendfact", default=1, help="Chunksize for detrending (pow-of-2 in 1000s)") (opts, args) = parser.parse_args() if len(args) == 0: if opts.globexp == None: print(full_usage) sys.exit(0) else: args = [] for globexp in opts.globexp.split(): args += glob.glob(globexp) useffts = True dosearch = True if opts.xwin: pgplot_device = "/XWIN" else: pgplot_device = "" fftlen = 8192 # Should be a power-of-two for best speed chunklen = 8000 # Must be at least max_downfact less than fftlen assert (opts.detrendfact in [1, 2, 4, 8, 16, 32]) detrendlen = opts.detrendfact * 1000 if (detrendlen > chunklen): chunklen = detrendlen fftlen = int(next2_to_n(chunklen)) blocks_per_chunk = chunklen // detrendlen overlap = (fftlen - chunklen) // 2 worklen = chunklen + 2 * overlap # currently it is fftlen... max_downfact = 30 default_downfacts = [2, 3, 4, 6, 9, 14, 20, 30, 45, 70, 100, 150, 220, 300] if args[0].endswith(".singlepulse"): filenmbase = args[0][:args[0].rfind(".singlepulse")] dosearch = False elif args[0].endswith(".dat"): filenmbase = args[0][:args[0].rfind(".dat")] else: filenmbase = args[0] # Don't do a search, just read results and plot if not dosearch: info, DMs, candlist, num_v_DMstr = \ read_singlepulse_files(args, opts.threshold, opts.T_start, opts.T_end) orig_N, orig_dt = int(info.N), info.dt obstime = orig_N * orig_dt else: DMs = [] candlist = [] num_v_DMstr = {} # Loop over the input files for filenm in args: if filenm.endswith(".dat"): filenmbase = filenm[:filenm.rfind(".dat")] else: filenmbase = filenm info = infodata.infodata(filenmbase + ".inf") DMstr = "%.2f" % info.DM DMs.append(info.DM) N, dt = int(info.N), info.dt obstime = N * dt # Choose the maximum width to search based on time instead # of bins. This helps prevent increased S/N when the downsampling # changes as the DM gets larger. if opts.maxwidth > 0.0: downfacts = [ x for x in default_downfacts if x * dt <= opts.maxwidth ] else: downfacts = [x for x in default_downfacts if x <= max_downfact] if len(downfacts) == 0: downfacts = [default_downfacts[0]] if (filenm == args[0]): orig_N = N orig_dt = dt if useffts: fftd_kerns = make_fftd_kerns(default_downfacts, fftlen) if info.breaks: offregions = list( zip([x[1] for x in info.onoff[:-1]], [x[0] for x in info.onoff[1:]])) # If last break spans to end of file, don't read it in (its just padding) if offregions[-1][1] == N - 1: N = offregions[-1][0] + 1 outfile = open(filenmbase + '.singlepulse', mode='w') # Compute the file length in detrendlens roundN = N // detrendlen * detrendlen numchunks = roundN // chunklen # Read in the file print('Reading "%s"...' % filenm) timeseries = Num.fromfile(filenm, dtype=Num.float32, count=roundN) # Split the timeseries into chunks for detrending numblocks = roundN // detrendlen timeseries.shape = (numblocks, detrendlen) stds = Num.zeros(numblocks, dtype=Num.float64) # de-trend the data one chunk at a time print(' De-trending the data and computing statistics...') for ii, chunk in enumerate(timeseries): if opts.fast: # use median removal instead of detrending (2x speedup) tmpchunk = chunk.copy() tmpchunk.sort() med = tmpchunk[detrendlen // 2] chunk -= med tmpchunk -= med else: # The detrend calls are the most expensive in the program timeseries[ii] = scipy.signal.detrend(chunk, type='linear') tmpchunk = timeseries[ii].copy() tmpchunk.sort() # The following gets rid of (hopefully) most of the # outlying values (i.e. power dropouts and single pulses) # If you throw out 5% (2.5% at bottom and 2.5% at top) # of random gaussian deviates, the measured stdev is ~0.871 # of the true stdev. Thus the 1.0/0.871=1.148 correction below. # The following is roughly .std() since we already removed the median stds[ii] = Num.sqrt( (tmpchunk[detrendlen // 40:-detrendlen // 40]**2.0).sum() / (0.95 * detrendlen)) stds *= 1.148 # sort the standard deviations and separate those with # very low or very high values sort_stds = stds.copy() sort_stds.sort() # identify the differences with the larges values (this # will split off the chunks with very low and very high stds locut = (sort_stds[1:numblocks // 2 + 1] - sort_stds[:numblocks // 2]).argmax() + 1 hicut = ( sort_stds[numblocks // 2 + 1:] - sort_stds[numblocks // 2:-1]).argmax() + numblocks // 2 - 2 std_stds = scipy.std(sort_stds[locut:hicut]) median_stds = sort_stds[(locut + hicut) // 2] print(" pseudo-median block standard deviation = %.2f" % (median_stds)) if (opts.badblocks): lo_std = median_stds - 4.0 * std_stds hi_std = median_stds + 4.0 * std_stds # Determine a list of "bad" chunks. We will not search these. bad_blocks = Num.nonzero((stds < lo_std) | (stds > hi_std))[0] print(" identified %d bad blocks out of %d (i.e. %.2f%%)" % \ (len(bad_blocks), len(stds), 100.0*float(len(bad_blocks))/float(len(stds)))) stds[bad_blocks] = median_stds else: bad_blocks = [] print(" Now searching...") # Now normalize all of the data and reshape it to 1-D timeseries /= stds[:, Num.newaxis] timeseries.shape = (roundN, ) # And set the data in the bad blocks to zeros # Even though we don't search these parts, it is important # because of the overlaps for the convolutions for bad_block in bad_blocks: loind, hiind = bad_block * detrendlen, (bad_block + 1) * detrendlen timeseries[loind:hiind] = 0.0 # Convert to a set for faster lookups below bad_blocks = set(bad_blocks) # Step through the data dm_candlist = [] for chunknum in range(numchunks): loind = chunknum * chunklen - overlap hiind = (chunknum + 1) * chunklen + overlap # Take care of beginning and end of file overlap issues if (chunknum == 0): # Beginning of file chunk = Num.zeros(worklen, dtype=Num.float32) chunk[overlap:] = timeseries[loind + overlap:hiind] elif (chunknum == numchunks - 1): # end of the timeseries chunk = Num.zeros(worklen, dtype=Num.float32) chunk[:-overlap] = timeseries[loind:hiind - overlap] else: chunk = timeseries[loind:hiind] # Make a set with the current block numbers lowblock = blocks_per_chunk * chunknum currentblocks = set(Num.arange(blocks_per_chunk) + lowblock) localgoodblocks = Num.asarray( list(currentblocks - bad_blocks)) - lowblock # Search this chunk if it is not all bad if len(localgoodblocks): # This is the good part of the data (end effects removed) goodchunk = chunk[overlap:-overlap] # need to pass blocks/chunklen, localgoodblocks # dm_candlist, dt, opts.threshold to cython routine # Search non-downsampled data first # NOTE: these nonzero() calls are some of the most # expensive calls in the program. Best bet would # probably be to simply iterate over the goodchunk # in C and append to the candlist there. hibins = Num.flatnonzero(goodchunk > opts.threshold) hivals = goodchunk[hibins] hibins += chunknum * chunklen hiblocks = hibins // detrendlen # Add the candidates (which are sorted by bin) for bin, val, block in zip(hibins, hivals, hiblocks): if block not in bad_blocks: time = bin * dt dm_candlist.append( candidate(info.DM, val, time, bin, 1)) # Prepare our data for the convolution if useffts: fftd_chunk = rfft(chunk, -1) # Now do the downsampling... for ii, downfact in enumerate(downfacts): if useffts: # Note: FFT convolution is faster for _all_ downfacts, even 2 goodchunk = fft_convolve(fftd_chunk, fftd_kerns[ii], overlap, -overlap) else: # The normalization of this kernel keeps the post-smoothing RMS = 1 kernel = Num.ones(downfact, dtype=Num.float32) / \ Num.sqrt(downfact) smoothed_chunk = scipy.signal.convolve( chunk, kernel, 1) goodchunk = smoothed_chunk[overlap:-overlap] #hibins = Num.nonzero(goodchunk>opts.threshold)[0] hibins = Num.flatnonzero(goodchunk > opts.threshold) hivals = goodchunk[hibins] hibins += chunknum * chunklen hiblocks = hibins // detrendlen hibins = hibins.tolist() hivals = hivals.tolist() # Now walk through the new candidates and remove those # that are not the highest but are within downfact/2 # bins of a higher signal pulse hibins, hivals = prune_related1( hibins, hivals, downfact) # Insert the new candidates into the candlist, but # keep it sorted... for bin, val, block in zip(hibins, hivals, hiblocks): if block not in bad_blocks: time = bin * dt bisect.insort( dm_candlist, candidate(info.DM, val, time, bin, downfact)) # Now walk through the dm_candlist and remove the ones that # are within the downsample proximity of a higher # signal-to-noise pulse dm_candlist = prune_related2(dm_candlist, downfacts) print(" Found %d pulse candidates" % len(dm_candlist)) # Get rid of those near padding regions if info.breaks: prune_border_cases(dm_candlist, offregions) # Write the pulses to an ASCII output file if len(dm_candlist): #dm_candlist.sort(cmp_sigma) outfile.write( "# DM Sigma Time (s) Sample Downfact\n") for cand in dm_candlist: outfile.write(str(cand)) outfile.close() # Add these candidates to the overall candidate list for cand in dm_candlist: candlist.append(cand) num_v_DMstr[DMstr] = len(dm_candlist) if (opts.makeplot): # Step through the candidates to make a SNR list DMs.sort() snrs = [] for cand in candlist: if not Num.isinf(cand.sigma): snrs.append(cand.sigma) if snrs: maxsnr = max(int(max(snrs)), int(opts.threshold)) + 3 else: maxsnr = int(opts.threshold) + 3 # Generate the SNR histogram snrs = Num.asarray(snrs) (num_v_snr, edges) = Num.histogram(snrs, int(maxsnr - opts.threshold + 1), [opts.threshold, maxsnr]) snrs = edges[:-1] + 0.5 * (edges[1] - edges[0]) num_v_snr = num_v_snr.astype(Num.float32) num_v_snr[num_v_snr == 0.0] = 0.001 # Generate the DM histogram num_v_DM = Num.zeros(len(DMs)) for ii, DM in enumerate(DMs): num_v_DM[ii] = num_v_DMstr["%.2f" % DM] DMs = Num.asarray(DMs) # open the plot device short_filenmbase = filenmbase[:filenmbase.find("_DM")] if opts.T_end > obstime: opts.T_end = obstime if pgplot_device: ppgplot.pgopen(pgplot_device) else: if (opts.T_start > 0.0 or opts.T_end < obstime): ppgplot.pgopen(short_filenmbase + '_%.0f-%.0fs_singlepulse.ps/VPS' % (opts.T_start, opts.T_end)) else: ppgplot.pgopen(short_filenmbase + '_singlepulse.ps/VPS') ppgplot.pgpap(7.5, 1.0) # Width in inches, aspect # plot the SNR histogram ppgplot.pgsvp(0.06, 0.31, 0.6, 0.87) ppgplot.pgswin(opts.threshold, maxsnr, Num.log10(0.5), Num.log10(2 * max(num_v_snr))) ppgplot.pgsch(0.8) ppgplot.pgbox("BCNST", 0, 0, "BCLNST", 0, 0) ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, "Signal-to-Noise") ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, "Number of Pulses") ppgplot.pgsch(1.0) ppgplot.pgbin(snrs, Num.log10(num_v_snr), 1) # plot the DM histogram ppgplot.pgsvp(0.39, 0.64, 0.6, 0.87) # Add [1] to num_v_DM in YMAX below so that YMIN != YMAX when max(num_v_DM)==0 ppgplot.pgswin( min(DMs) - 0.5, max(DMs) + 0.5, 0.0, 1.1 * max(num_v_DM + [1])) ppgplot.pgsch(0.8) ppgplot.pgbox("BCNST", 0, 0, "BCNST", 0, 0) ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, r"DM (pc cm\u-3\d)") ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, "Number of Pulses") ppgplot.pgsch(1.0) ppgplot.pgbin(DMs, num_v_DM, 1) # plot the SNR vs DM plot ppgplot.pgsvp(0.72, 0.97, 0.6, 0.87) ppgplot.pgswin(min(DMs) - 0.5, max(DMs) + 0.5, opts.threshold, maxsnr) ppgplot.pgsch(0.8) ppgplot.pgbox("BCNST", 0, 0, "BCNST", 0, 0) ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, r"DM (pc cm\u-3\d)") ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, "Signal-to-Noise") ppgplot.pgsch(1.0) cand_ts = Num.zeros(len(candlist), dtype=Num.float32) cand_SNRs = Num.zeros(len(candlist), dtype=Num.float32) cand_DMs = Num.zeros(len(candlist), dtype=Num.float32) for ii, cand in enumerate(candlist): cand_ts[ii], cand_SNRs[ii], cand_DMs[ii] = \ cand.time, cand.sigma, cand.DM ppgplot.pgpt(cand_DMs, cand_SNRs, 20) # plot the DM vs Time plot ppgplot.pgsvp(0.06, 0.97, 0.08, 0.52) ppgplot.pgswin(opts.T_start, opts.T_end, min(DMs) - 0.5, max(DMs) + 0.5) ppgplot.pgsch(0.8) ppgplot.pgbox("BCNST", 0, 0, "BCNST", 0, 0) ppgplot.pgmtxt('B', 2.5, 0.5, 0.5, "Time (s)") ppgplot.pgmtxt('L', 1.8, 0.5, 0.5, r"DM (pc cm\u-3\d)") # Circles are symbols 20-26 in increasing order snr_range = 12.0 cand_symbols = (cand_SNRs - opts.threshold) / snr_range * 6.0 + 20.5 cand_symbols = cand_symbols.astype(Num.int32) cand_symbols[cand_symbols > 26] = 26 for ii in [26, 25, 24, 23, 22, 21, 20]: inds = Num.nonzero(cand_symbols == ii)[0] ppgplot.pgpt(cand_ts[inds], cand_DMs[inds], ii) # Now fill the infomation area ppgplot.pgsvp(0.05, 0.95, 0.87, 0.97) ppgplot.pgsch(1.0) ppgplot.pgmtxt('T', 0.5, 0.0, 0.0, "Single pulse results for '%s'" % short_filenmbase) ppgplot.pgsch(0.8) # first row ppgplot.pgmtxt('T', -1.1, 0.02, 0.0, 'Source: %s'%\ info.object) ppgplot.pgmtxt('T', -1.1, 0.33, 0.0, 'RA (J2000):') ppgplot.pgmtxt('T', -1.1, 0.5, 0.0, info.RA) ppgplot.pgmtxt('T', -1.1, 0.73, 0.0, 'N samples: %.0f' % orig_N) # second row ppgplot.pgmtxt('T', -2.4, 0.02, 0.0, 'Telescope: %s'%\ info.telescope) ppgplot.pgmtxt('T', -2.4, 0.33, 0.0, 'DEC (J2000):') ppgplot.pgmtxt('T', -2.4, 0.5, 0.0, info.DEC) ppgplot.pgmtxt('T', -2.4, 0.73, 0.0, 'Sampling time: %.2f \gms'%\ (orig_dt*1e6)) # third row if info.instrument.find("pigot") >= 0: instrument = "Spigot" else: instrument = info.instrument ppgplot.pgmtxt('T', -3.7, 0.02, 0.0, 'Instrument: %s' % instrument) if (info.bary): ppgplot.pgmtxt('T', -3.7, 0.33, 0.0, r'MJD\dbary\u: %.12f' % info.epoch) else: ppgplot.pgmtxt('T', -3.7, 0.33, 0.0, r'MJD\dtopo\u: %.12f' % info.epoch) ppgplot.pgmtxt('T', -3.7, 0.73, 0.0, r'Freq\dctr\u: %.1f MHz'%\ ((info.numchan/2-0.5)*info.chan_width+info.lofreq)) ppgplot.pgiden() ppgplot.pgend()
r = N / (4 * numharm) + uniform(0.0, 1.0, 1)[0] # average freq over "observation" z = uniform(-100, 100, 1)[0] # average fourier f-dot w = uniform(-600, 600, 1)[0] # fourier freq double deriv w = 0.0 # fourier freq double deriv data = np.zeros_like(us) for ii in range(numharm): rh = r * (ii + 1) zh = z * (ii + 1) wh = w * (ii + 1) r0 = rh - 0.5 * zh + wh / 12.0 # Make symmetric for all z and w z0 = zh - 0.5 * wh phss = 2.0 * np.pi * (us * (us * (us * wh / 6.0 + z0 / 2.0) + r0)) data += np.cos(phss) data += noiseamp * norm(N) ft = presto.rfft(data) offset = uniform(-1.0, 1.0, 3) * np.array([0.5, 2.0, 20.0 ]) / (0.5 * numharm) a = clk() if (numharm > 1): [maxpow, rmax, zmax, rds] = presto.maximize_rz_harmonics(ft, r + offset[0], z + offset[1], numharm, norm=1.0) else: [maxpow, rmax, zmax, rd] = presto.maximize_rz(ft, r + offset[0], z + offset[1],
r = N/4.0 # average freq over "observation" #r = N/4.0 + 0.5 # average freq over "observation" rint = num.floor(r) dr = 1.0/32.0 dz = 0.18 np = 512 # number of pixels across for f-fdot image z = 10.0 # average fourier f-dot w = -40.0 # fourier freq double deriv noise = 0.0 noise = 1.0*norm(N) us = num.arange(N, dtype=num.float64) / N # normalized time coordinate r0 = r - 0.5 * z + w / 12.0 # Make symmetric for all z and w z0 = z - 0.5 * w phss = 2.0 * num.pi * (us * (us * (us * w/6.0 + z0/2.0) + r0)) ft = presto.rfft(num.cos(phss)+noise) ffdot = presto.ffdot_plane(ft, rint-np/2*dr, dr, np, 0.0-np/2*dz, dz, np) pffdot = presto.spectralpower(ffdot.flat) theo_max_pow = N**2.0/4.0 frp = max(pffdot) / theo_max_pow # Fraction of recovered power print("Fraction of recovered signal power = %f" % frp) a = time.clock() [maxpow, rmax, zmax, rd] = presto.maximize_rz(ft, r+norm(1)[0]/5.0, z+norm(1)[0], norm=1.0) print("Time for rz:", time.clock()-a) print(r, rmax, z, zmax, theo_max_pow, maxpow) a = time.clock() [maxpow, rmax, zmax, wmax, rd] = presto.maximize_rzw(ft, r+norm(1)[0]/5.0, z+norm(1)[0], w+norm(1)[0]*5.0, norm=1.0)