def makesendbufs(wmap, destmap, nmax=10000): ''' Given a WaveformMap wmap and a destmap, as produced by keyroute, that maps ranks in an MPI communicator to sets of keys in wmap that should be sent to that rank, prepare and return a map from destination ranks to a list of BytesIO buffers that each hold a serialized representation of subset (of at most nmax Waveforms) of wmap to be sent to that rank. ''' # Assign the buffers to target ranks buffers = defaultdict(list) for rank, rkeys in destmap.items(): remaining = list(rkeys.intersection(wmap)) while remaining: # Build a submap to serialize rmap = WaveformMap((k, wmap[k]) for k in remaining[:nmax]) # Serialize to a BytesIO stream bstr = io.BytesIO() rmap.store(bstr) # Append the buffer to the map buffers[rank].append(bstr) # Discard the serialized portion remaining = remaining[nmax:] return buffers
def getwavegrps(infiles, nsamp=None): ''' For a sequence infiles of input WaveformMap files, prepare a mapping from transmit-receiver pairs to a list of Waveform objects representing backscatter waves observed at the pair. If the same WaveformMap key is duplicated in multiple input files, the list corresponding to that key will contain each Waveform in an order tha tmatches the lexicographical ordering of the inputs. If nsamp is not None, the nsamp property of each Waveform object will be overridden. Only element indices whose Waveform lists have a length that matches that of the longest Waveform list will be included. ''' wavegrps = defaultdict(list) for infile in sorted(infiles): wmap = WaveformMap.load(infile, dtype='float64') if nsamp: wmap.nsamp = nsamp for (t, r), wave in wmap.items(): wavegrps[t, r].append(wave) # Filter the list to exclude short lists maxlen = max(len(w) for w in wavegrps.values()) return {k: v for k, v in wavegrps.items() if len(v) == maxlen}
def loadlocalmaps(infiles, windower, *args, **kwargs): ''' Invoke findmaps(*args, **kwargs) to identify a map from file names for WaveformMap serializations to desired keys for that file, then load each file and extract the subset of the contained WaveformMap corresponding to those keys. If window is not None, it should be a callable that will be applied to each Waveform before it is added to the map. ''' wmap = WaveformMap() for f, pairs in findmaps(infiles, *args, **kwargs).items(): # Define a filter to only load locally assigned keys for key, wave in WaveformMap.generate(f): if key not in pairs: continue if windower: wave = windower(wave) wmap[key] = wave return wmap
def procmessages(sendreqs, recvreqs, recvbufs): ''' Enter a loop to process incoming messages and close out pending sends, yielding (t, r) pairs and Waveform records as they are received. The arguments sendreqs and recvreqs are, respectively, lists send and receive requests as prepared by postmessages. The argument recvbufs is a map from source ranks to lists of BytesIO buffers that will be populated with the incoming messages associated with recvreqs. No action is taken when send requests are ready, except to wait for their completion. ''' # Track the number of receive requests to differentiate sends and receives nrecvs = len(recvreqs) # Lump all requests together for processing requests = recvreqs + sendreqs # Begin processing messages status = MPI.Status() while True: # Wait until a message can be processed idx = MPI.Request.Waitany(requests, status) if idx == MPI.UNDEFINED: break # Figure out the rank, tag and size of this message tag = status.tag if 0 <= idx < nrecvs: # Parse the incoming WaveformMap stream bstr = recvbufs[status.source][tag] bstr.seek(0) # Yield the keys and waveforms in turn yield from WaveformMap.generate(bstr) # Free buffer by closing the stream bstr.close() elif idx < 0 or idx >= len(requests): raise ValueError(f'Unexpected MPI request index {idx}')
def findmaps(infiles, start=0, stride=1): ''' Parse all of the WaveformMap instances encapsulated in infiles (a list of files or globs) to identify for each file a set of all keys (t, r) in that file. If (start, stride) is other than (0, 1), the keys in each file will be pared to only sorted(keys)[start::stride]. A map from file names to the (optionally strided) set of pairs for each file is returned. ''' pairmaps = {} for f in infiles: # Build the key generator keys = WaveformMap.generate(f, keys_only=True) if (start, stride) != (0, 1): # Sort for striding, if desired keys = sorted(keys)[start::stride] # Convert to set and store if nonempty keys = set(keys) if keys: pairmaps[f] = keys return pairmaps
if vidmode: # Load the backscatter waves in groups by element wavegrps = getwavegrps(args.inputs, args.nsamp) if args.atimes and not args.skip_alignment: # Shift waveforms if arrival times are provided wavegrps = shiftgrps(wavegrps, args.atimes, args.suppress) # Strip out the subsequent (realigned) times args.atimes = {k: [v[0]] for k, v in args.atimes.items()} print('Shifted waveform groups') print('Storing waveform video to file', args.output) plotframes(args.output, wavegrps, args.atimes, args.window, args.equalize, args.thresh, args.bitrate, args.one_sided) else: # Load the waveforms waves = WaveformMap() for inf in args.inputs: wm = WaveformMap.load(inf, dtype='float64') if args.nsamp: wm.nsamp = args.nsamp waves.update(wm) # There is no mean arrival time unless arrival times are provided mtime = None if args.atimes: # Find the mean arrival time for all waveforms celts = set(waves).intersection(args.atimes) print(f'{len(celts)} waveforms have associated arrival times') mtime = int(np.mean([args.atimes[c] for c in celts])) if args.suppress: print('Will suppress unaligned waveforms')
recvreqs = postmessages(recvbufs, send=False) sendreqs = postmessages(sendbufs, send=True) # Outbound buffers are captured by requests and no longer needed del sendbufs # Process the messages, adding waveforms to the local map printroot(grank, 'Collecting incoming waveforms...') wmap.update(procmessages(sendreqs, recvreqs, recvbufs)) printroot(grank, f'Final size of local map at rank {grank} is {len(wmap)}') gnsize = MPI.COMM_WORLD.reduce(len(wmap)) printroot(grank, f'{gnsize} waveforms scattered globally') # Build an output map omap = WaveformMap() while wmap: (t, r), left = wmap.popitem() try: right = wmap.pop((r, t)) except KeyError: continue omap[min(t, r), max(t, r)] = pairavg(left, right, args.osamp, args.clip) gosize = MPI.COMM_WORLD.reduce(len(omap)) printroot(grank, f'{gosize} reciprocal pairs averaged globally') # Write the output, serializing within local communicators for i in range(lsize): if i == lrank: omap.store(args.output, append=i)
def fhfft(infile, outfile, groupmap, **kwargs): ''' For a real WaveformSet file infile, perform Hadamard decoding and then a DFT of the temporal samples. The Hadamard decoding follows the grouping configuration stored in groupmap, a map (element index) -> (local Hadamard index, group number) that defines Hadamard groups and must agree with the local group configuration represented in the input. The resulting transformed records will be stored in the output outfile. The nature of outfile depends on the optional argument trmap (see below). If trmap is not provided, all records will be written as a binary blob; the outfile should be a single string providing the location of the output. The output will have shape Ns x Nt x Nr, where Ns is the number of output samples per waveform (as governed by the spectral or temporal windows applied), Nt is the number of input transmit channels, and Nr is the number of input receive channels. If trmap is provided, outfile should be a one-to-one map from the keys of trmap to output files. A WaveformMap object will be created for each key in trmap and stored at the location indicated by the corresponding value in outfile. Output file(s) will be created or truncated. Any TGC parameters in the input, accessible as wset.context['tgc'], will be used to adjust the amplitudes of the waveforms prior to applying Hadamard and Fourier transforms. The kwargs contain optional values or default overrides: * freqs (default: None): When not None, a sequence (start, end) to be passed as slice(start, end) to bandpass filter the input after Hadamard decoding. * rolloff (default: None): When not None, an integer that defines the half-width of a Hann window that rolls off the bandpass filter specified in freqs. Ignored if freqs is not provided. * nsamp (default: None): The length of the time window over which waveforms are considered (and DFTs are performed), starting from global time 0 (i.e., without consideration for input F2C). If None, the value of nsamp in the input is used. ** NOTE: Because the time window always starts at global time 0, a waveform with a data window (start, length) will be cropped when (f2c + start + length) > nsamp, even if nsamp is the value encoded in the file. * tgcsamps (default: 16 [for integer datatypes] or 0 [else]): The number of temporal samples to which a single TGC parameter applies. Signals will be scaled by an appropriate section of the multiplier mpy = (invtgc[:,np.newaxis] * np.ones((ntgc, tgcsamps), dtype=np.float32)).ravel('C'), where the values invtgc = 10.**(-wset.context['tgc'] / 20.) and ntgc = len(wset.context['tgc']). The multiplier mpy is defined over a window that starts at file sample 0 (global time wset.f2c). Set tgcsamps to 0 (or None) to disable compensation. If the WaveformSet includes TGC parameters and tgcsamps is a positive integer, then len(mpy) must be at least long enough to encompass all data windows encoded in the file. * tgcmap (default: None): If provided, should be a two-column, rank-2 Numpy array (or compatible sequence) that relates nominal gains in column 0 to actual gains in column 1. The rows of the array will be used as control points in a piecewise linear interpolation (using numpy.interp) that will map TGC parameters specified in the WaveformSet file to actual gains. In other words, the TGC values described above will be replaced with tgc = np.interp(tgc, tgcmap[:,0], tgcmap[:,1]) whenever tgcmap is provided. * tdout (default: False): Set to True to output time-domain waveforms rather than spectral samples. Preserves input acquisition windows. * signs (default: None): When not None, should be a sequence of length wset.txgrps.size that specifies a 1 for any local Hadamard index (corresponding to lines in the file) that should be negated, and 0 anywhere else. Ignored when an FHT is not performed. * trmap (default: None): If provided, must be a map from a label (referencing an output location in the map outfile) to a map from receive indices to lists of transmit indices that, together, identify transmit-receive pairs to extract from the input. * start (default: 0) and stride (default: 1): For an input WaveformSet wset, process receive channels in wset.rxidx[start::stride]. * lock (default: None): If not None, it should be a context manager that is invoked to serialize writes to output. * event (default: None): Only used then trmap is not provided. If not None, event.set() and event.wait() are called to ensure the output header is written to the binary-blob output before records are appended. The value event.is_set() should be False prior to execution. ''' # Override acquisition window, if desired nsamp = kwargs.pop('nsamp', None) # Grab synchronization mechanisms try: lock = kwargs.pop('lock') except KeyError: lock = multiprocessing.Lock() try: event = kwargs.pop('event') except KeyError: event = multiprocessing.Event() # Grab FFT and FHT switches and options tdout = kwargs.pop('tdout', False) freqs = kwargs.pop('freqs', None) rolloff = kwargs.pop('rolloff', None) dofft = (freqs is not None) or not tdout if freqs is not None: flo, fhi = freqs if rolloff and not 0 < rolloff < (fhi - flo) // 2: raise ValueError( 'Rolloff must be None or less than half bandwidth') # Grab striding information start = kwargs.pop('start', 0) stride = kwargs.pop('stride', 1) # Grab sign map information signs = kwargs.pop('signs', None) # Grab the number of samples per TGC value and an optional gain map tgcsamps = kwargs.pop('tgcsamps', None) tgcmap = kwargs.pop('tgcmap', None) trmap = kwargs.pop('trmap', None) if len(kwargs): raise TypeError(f"Unrecognized keyword '{next(iter(kwargs))}'") # Open the input and create a corresponding output wset = WaveformSet.load(infile) # Pull default sample count from input file if nsamp is None: nsamp = wset.nsamp elif wset.nsamp < nsamp: wset.nsamp = nsamp # Handle TGC compensation if necessary try: tgc = np.asarray(wset.context['tgc'], dtype=np.float32) except (KeyError, AttributeError): tgc = np.array([], dtype=np.float32) if tgcmap is not None: # Make sure that the TGC map is sorted and interpolate tgx, tgy = zip(*sorted((k, v) for k, v in tgcmap)) # TGC curves are always float32, regardless of tgcmap types tgc = np.interp(tgc, tgx, tgy).astype(np.float32) # Pick a suitable default value for tgcsamps if tgcsamps is None: tgcsamps = 16 if np.issubdtype(wset.dtype, np.integer) else 0 # Linearize, invert, and expand the TGC curves tgc = ((10.**(-tgc[:, np.newaxis] / 20.) * np.ones( (len(tgc), tgcsamps), dtype=np.float32))).ravel('C') # Figure out the data type of compensated waveforms if len(tgc): itype = np.dtype(wset.dtype.type(0) * tgc.dtype.type(0)) else: itype = wset.dtype # Make sure that the data type is always floating-point if not np.issubdtype(itype, np.floating): itype = np.dtype('float64') # Create a WaveformSet object to hold the ungrouped data ftype = _r2c_datatype(itype) otype = ftype if not tdout else itype # Make sure the WaveformSet has a local configuration try: gcount, gsize = wset.txgrps except TypeError: raise ValueError('A valid Tx-group configuration is required') if gsize < 1 or (gsize & (gsize - 1)): raise ValueError('Hadamard length must be a positive power of 2') # Validate local portion of the group map and assign wset.groupmap = groupmap if signs is not None: # Ensure signs has values 0 or 1 in the right type signs = np.asarray([1 - 2 * s for s in signs], dtype=itype) if signs.ndim != 1 or len(signs) != gsize: msg = f'Sign list must have shape ({wset.txgrps[1]},)' raise ValueError(msg) # Identify all FHTs represented by stored transmission indices fhts = {} for i in wset.txidx: g, l = i // gsize, i % gsize try: fhts[g].append(l) except KeyError: fhts[g] = [l] # Verify that all FHTs are complete for g, ll in fhts.items(): if len(ll) != gsize: raise ValueError(f'FHT group {gi} is incomplete') if any(i != j for i, j in enumerate(sorted(ll))): raise ValueError(f'FHT group {gi} has improper local indices') # Map each FHT group to a list of row indices for the FHT # and each element corresponding to an FHT output to row indices gidx = lambda l, g: g * gsize + l fhts = {g: [wset.tx2row(gidx(l, g)) for l in range(gsize)] for g in fhts} invgroups = {(l, g): i for i, (l, g) in wset.groupmap.items()} el2row = { invgroups[l, g]: wset.tx2row(gidx(l, g)) for g in fhts for l in range(gsize) } # Create intermediate (FHT) and output (FHFFT) arrays # FFT axis is contiguous for FFT performance b = pyfftw.empty_aligned((wset.ntx, nsamp), dtype=itype, order='C') if dofft: # Create FFT output and a plan cdim = (wset.ntx, nsamp // 2 + 1) c = pyfftw.empty_aligned(cdim, dtype=ftype, order='C') fwdfft = pyfftw.FFTW(b, c, axes=(1, ), direction='FFTW_FORWARD') # Create an inverse FFT plan for time-domain output if tdout: invfft = pyfftw.FFTW(c, b, axes=(1, ), direction='FFTW_BACKWARD') # Find the spectral window of interest fswin = specwin(cdim[1], freqs) # Try to build bandpass tails if rolloff: tails = np.hanning(2 * int(rolloff)) else: tails = np.array([]) if trmap: # Identify the subset of receive channels needed allrx = reduce(set.union, (trm.keys() for trm in trmap.values()), set()) rxneeded = sorted(allrx.intersection(wset.rxidx))[start::stride] else: rxneeded = wset.rxidx[start::stride] # In blob mode, the first write must create a header with lock: if not event.is_set(): # Create a sliced binary matrix output windim = (nsamp if tdout else fswin.length, wset.ntx, wset.nrx) mio.Slicer(outfile, dtype=otype, trunc=True, dim=windim) event.set() # Ensure the output header has been written event.wait() # Map receive channels to rows (slabs) in the output rx2slab = dict((i, j) for (j, i) in enumerate(sorted(wset.rxidx))) # Map transmit channels to decoded FHT rows outrows = [r for (e, r) in sorted(el2row.items())] outbin = mio.Slicer(outfile) for rxc in rxneeded: # Find the input window relative to 0 f2c iwin = wset.getheader(rxc).win.shift(wset.f2c) owin = (0, nsamp) try: # Find overlap of global input and output windows ostart, istart, dlength = cutil.overlap(owin, iwin) except TypeError: # Default to 0-length windows at start of acquisition iwin = Window(0, 0, nonneg=True) owin = Window(0, 0, nonneg=True) else: # Convert input and output windows from global f2c to file f2c iwin = Window(istart, dlength, nonneg=True) owin = Window(ostart, dlength, nonneg=True) # Read the data over the input window data = wset.getrecord(rxc, window=iwin)[1] # Clear the data array b[:, :] = 0. ws, we = owin.start, owin.end if iwin.length and gsize > 1: # Perform grouped Hadamard transforms with optional sign flips for grp, rows in fhts.items(): # Ensure FHT axis is contiguous for performance dblk = np.asfortranarray(data[rows, :]) b[rows, ws:we] = fwht(dblk, axes=0) / gsize if signs is not None: b[rows, ws:we] *= signs[:, np.newaxis] else: b[:, ws:we] = data # Time-gain compensation, if necessary if len(tgc) and iwin.length: twin = (0, len(tgc)) try: tstart, istart, dlength = cutil.overlap(twin, iwin) if dlength != iwin.length: raise ValueError except (TypeError, ValueError): raise ValueError( f'TGC curve does not encompass data for channel {rxc}') b[:, ws:we] *= tgc[np.newaxis, tstart:tstart + dlength] if dofft: fwdfft() # Suppress content out of the band c[:, :fswin.start] = 0. c[:, fswin.end:] = 0. # Bandpass filter the spectral samples if len(tails) > 0: ltails = len(tails) // 2 c[:, fswin.start:fswin.start + ltails] *= tails[np.newaxis, :ltails] c[:, fswin.end - ltails:fswin.end] *= tails[np.newaxis, -ltails:] # Revert to time-domain representation if necessary if tdout: invfft() if not trmap: # Write the binary blob for this receive channel orow = rx2slab[rxc] with lock: if tdout: outbin[orow] = b[outrows, :].T else: outbin[orow] = c[outrows, fswin.start:fswin.end].T # Nothing more to do in blob mode continue # Slice desired range from output data if tdout: dblock = b[:, ws:we] dstart = ws else: dblock = c[:, fswin.start:fswin.end] dstart = fswin.start for label, trm in trmap.items(): # Pull tx list for this tier and rx channel, if possible try: tl = trm[rxc] except KeyError: tl = [] if not len(tl): continue # Collect all transmissions for this rx channel wmap = WaveformMap() for t in tl: # Make sure transmission is represented in output try: row = el2row[t] except KeyError: continue wave = Waveform(nsamp, dblock[row], dstart) wmap[t, rxc] = wave # Flush the waveform map to disk with lock: wmap.store(outfile[label], append=True)
parser.add_argument('inputs', type=str, nargs='+', help='Input WaveformMap files from which to extract') args = parser.parse_args(sys.argv[1:]) # Try to read all input WaveformMap files infiles = matchfiles(args.inputs) # Read a defined receive-to-transmit-list map if args.trmap: args.trmap = loadkeymat(args.trmap, scalar=False) # At first, clobber the output append = False for infile in infiles: wmap = WaveformMap.load(infile) # Build the appropriate subset of the WaveformMap if not args.backscatter: wvs = trextract(wmap, args.trmap, args.random) else: wvs = ((k, v) for k, v in wmap.items() if k[0] == k[1]) omap = WaveformMap(wvs) if args.output: # Save to common output and switch to append mode omap.store(args.output, compression=args.compression, append=append) append = True else: output = os.path.splitext(infile)[0] + 'extract.wmz' omap.store(output, compression=args.compression, append=False)
def calcdelays(datafile, reffile, osamp=1, rank=0, grpsize=1, **kwargs): ''' Given a datafile containing a habis.sigtools.WaveformMap, find arrival times using cross-correlation or IMER for waveforms returned by wavegen(data, rank=rank, grpsize=grpsize, **exargs), where data is the WaveformMap encoded in datafile and exargs is a subset of kwargs as described below. For arrival times determined from cross-correlation, a reference waveform (as habis.sigtools.Waveform) is read from reffile. For IMER arrival times, reffile is ignored. The return value is a 2-tuple containing, first, a dictionary that maps a (t,r) transmit-receive index pair to delay in samples; and, second, a dictionary that maps stat groups to counts of waveforms that match the stats. Optional keyword arguments include: * flipref: A Boolean (default: False) that, when True, causes the refrence waveform to be negated when read. * nsamp: Override data.nsamp. Useful mainly for bandpass filtering. * negcorr: A Boolean (default: False) passed to Waveform.delay as the 'negcorr' argument to consider negative cross-correlation. * signsquare: Square the waveform and reference amplitudes (multiplying each signal by its absolute value to preserve signs) to better emphasize peaks in the cross-correlation. The squaring is done right after any bandpass filtering, so other parameters that are influence by amplitude (e.g., minsnr, thresholds in peaks) should be altered to account for the squared amplitudes. * minsnr: A sequence (mindb, noisewin) used to define the minimum acceptable SNR in dB (mindb) by comparing the peak signal amplitude to the minimum standard deviation over a sliding window of width noisewin. SNR for each signal is calculated after application of an optional window. Delays will not be calculated for signals fail to exceed the minimum threshold. * denoise: If not None, a dictionary suitable for passing as keyword arguments (**denoise) to Waveform.denoise to use CFAR rejection of the Gabor spectrogram to isolate the signal. Denoising is done after minimum-SNR rejection to avoid too many false matches with very-low-noise signals. * peaks: A dictionary suitable for passing as keyword arguments (**peaks) to the isolatepeak function, excluding the first three arguments. *** NOTE: peak windowing is done after overall windowing and after possible exclusion by minsnr. *** * delaycache: A map from transmit-receive element pairs (t, r) to a precomputed delay d. If a value exists for a given pair (t, r) in the WaveformMap and the element map, the precomputed value will be used in favor of explicit computation. * queue: If not none, the return values are passed as an argument to queue.put(). * eleak: If not None, a floating-point value in the range [0, 1) that specifies the maximum permissible fraction of the total signal energy that may arrive before identified arrival times. Any waveform for which the fraction of total energy arriving before the arrival time exceeds eleak will be rejected as unacceptable. Estimates of energy leaks ignore any fractional parts of arrival times. Energy leaks are estimated after any bandpass filtering or windowing. Estimates never consider peak isolation. * imer: A dictionary to provide all but the first argument of getimertime. If this is provided, getimertime will be used instead of (optional) peak isolation and cross-correlation to determine an arrival time. * elements: If not None, an N-by-3 array or a map from element indices to coordinates. If wavegen returns a neighborhood of more than one transmit-receive pair for any arrival time, the element coordinates will be used to find an optimal (in the least-squares sense) slowness to predict arrivals observed in the neighborhood. If an arrival-time measurement for the "key" pair in a measurement neighborhood is available and average slowness imputed by this arrival time falls within 1.5 IQR of the average slowness values for all pairs in the neighborhood, or if the neighborhood consists of only the key measurement pair, the arrival time for the "key" pair is used without modification. If the arrival time for a key pair is missing from the neighborhood, or falls outside of 1.5 IQR, the arrival time for the key pair will be the optimum slowness value for the neighborhood multiplied by the propagation distance for the pair. Element coordinates are required if wavegen returns neighborhoods of more than one member. Any unspecified keyword arguments are passed to wavegen. ''' # Read the data and reference data = WaveformMap.load(datafile) # Pull a copy of the IMER configuration, if it exists imer = dict(kwargs.pop('imer', ())) # Read the reference if IMER times are not desired if not imer: if reffile is None: raise ValueError('Must specify reffile or imer') ref = Waveform.fromfile(reffile) else: ref = None # Negate the reference, if appropriate if kwargs.pop('flipref', False) and ref is not None: ref = -ref # Unpack the signsquare argument and flip the reference if necessary signsquare = kwargs.pop('signsquare', False) if signsquare and ref is not None: ref = ref.signsquare() # Override the sample count, if desired try: nsamp = kwargs.pop('nsamp') except KeyError: pass else: data.nsamp = nsamp # Determine if an energy "leak" threshold is desired try: eleak = float(kwargs.pop('eleak')) except KeyError: eleak = None else: if not 0 <= eleak < 1: raise ValueError('Argument eleak must be in range [0, 1)') # Unpack minimum SNR requirements minsnr, noisewin = kwargs.pop('minsnr', (None, None)) # Pull the optional peak search criteria peaks = dict(kwargs.pop('peaks', ())) # Pull the optional denoising criteria denoise = dict(kwargs.pop('denoise', ())) # Determine whether to allow negative correlations negcorr = kwargs.pop('negcorr', False) # Grab an optional delay cache delaycache = kwargs.pop('delaycache', {}) # Grab an optional result queue queue = kwargs.pop('queue', None) # Element coordinates, if required elements = kwargs.pop('elements', None) # Pre-populate cached values result = {k: delaycache[k] for k in set(data).intersection(delaycache)} # Remove the cached waveforms from the set for k in result: data.pop(k, None) # Only keep a local portion of cached values result = {k: result[k] for k in sorted(result)[rank::grpsize]} wavestats = defaultdict(int) wavestats['cached'] = len(result) grpdelays = defaultdict(dict) # Process waveforms (possibly averages) as generated for key, sig, nbrs in wavegen(data, rank=rank, grpsize=grpsize, **kwargs): # Square the signal if desired if signsquare: sig = sig.signsquare() if minsnr is not None and noisewin is not None: if sig.snr(noisewin) < minsnr: wavestats['low-snr'] += 1 continue if denoise: sig = sig.denoise(**denoise) # Calculate cumulative energy in unwindowed waveform if eleak: cenergy = np.cumsum(sig.data**2) if imer: # Compute IMER time try: dl = getimertime(sig, osamp=osamp, **imer) # Compute IMER and its mean except IndexError: wavestats['failed-IMER'] += 1 continue else: if peaks: try: sig = isolatepeak(sig, key, **peaks) except ValueError: wavestats['missing-peak'] += 1 continue # Compute and record the delay dl = sig.delay(ref, osamp=osamp, negcorr=negcorr) if negcorr: if dl[1] < 0: wavestats['negative-correlated'] += 1 dl = dl[0] if eleak: # Evaluate leaked energy ssamp = int(dl) - sig.datawin.start - 1 if not 0 <= ssamp < len(cenergy): wavestats['out-of-bounds'] += 1 continue elif cenergy[ssamp] >= eleak * cenergy[-1]: wavestats['leaky'] += 1 continue if len(nbrs) < 2: # If the element is its own neighborhood, just copy result if key in nbrs: wavestats['sole-valid'] += 1 result[key] = dl else: wavestats['invalid-neighborhood'] += 1 else: # Results will be optimized from groups of delays for nbr in nbrs: grpdelays[nbr][key] = dl if grpdelays and elements is None: raise TypeError('Cannot have neighborhoods when elements is None') for key, grp in grpdelays.items(): if key[0] == key[1] or any(t == r for t, r in grp): raise ValueError('Backscatter neighborhoods not supported') pdist, slw = {}, {} try: # Find distances and speeds for neighborhoods for (t, r), dl in grp.items(): v = norm(elements[t] - elements[r]) pdist[t, r] = v slw[t, r] = dl / v except (KeyError, IndexError): # Either coordinates or a delay do not exist for wavestats['unknown-pair'] += 1 continue # Eliminate outliers based on slowness; discard slowness values slw = set(stats.mask_outliers(slw)) if key in slw: result[key] = grp[key] wavestats['valid-in-neighborhood'] += 1 else: wavestats['outlier'] += 1 try: queue.put((result, wavestats)) except AttributeError: pass return result, stats