def test_processing_multiprocessing(example_data_set): """ Tests the processing using multiprocessing. """ def null_processing(st, inv): return st data_set = ASDFDataSet(example_data_set.filename) output_filename = os.path.join(example_data_set.tmpdir, "output.h5") # Do not actually do anything. Apply an empty function. data_set.process(null_processing, output_filename, {"raw_recording": "raw_recording"}) del data_set data_set = ASDFDataSet(example_data_set.filename) out_data_set = ASDFDataSet(output_filename) assert data_set == out_data_set
st.taper(max_percentage=0.05, type="hann") st.interpolate(sampling_rate=sampling_rate, starttime=starttime, npts=npts) station_latitude = inv[0][0].latitude station_longitude = inv[0][0].longitude _, baz, _ = gps2DistAzimuth(station_latitude, station_longitude, event_latitude, event_longitude) components = [tr.stats.channel[-1] for tr in st] if "N" in components and "E" in components: st.rotate(method="NE->RT", back_azimuth=baz) # Convert to single precision to save space. for tr in st: tr.data = np.require(tr.data, dtype="float32") return st tag_name = "preprocessed_%is_to_%is" % (int(min_period), int(max_period)) tag_map = { "raw_recording": tag_name } ds.process(process_function, tag_name + ".h5", tag_map=tag_map) # Important when running with MPI as it might otherwise not be able to finish. del ds
npts=npts) station_latitude = float(inv[0][0].latitude) station_longitude = float(inv[0][0].longitude) _, baz, _ = gps2DistAzimuth(station_latitude, station_longitude, event_latitude, event_longitude) components = [tr.stats.channel[-1] for tr in st] if "N" in components and "E" in components: st.rotate(method="NE->RT", back_azimuth=baz) # Convert to single precision to save space. for tr in st: tr.data = np.require(tr.data, dtype="float32") return st new_tag = "proc_obsd_%i_%i" % (int(min_period), int(max_period)) tag_map = {old_tag: new_tag} outputfn = eventname + "." + new_tag + ".h5" outputfn = os.path.join(outputdir, outputfn) if os.path.exists(outputfn): os.remove(outputfn) ds.process(process_function, outputfn, tag_map=tag_map) t2 = time.time() print "Elapsed time:", t2 - t1
def preprocessing_function_asdf(processing_info): def zerophase_chebychev_lowpass_filter(trace, freqmax): """ Custom Chebychev type two zerophase lowpass filter useful for decimation filtering. This filter is stable up to a reduction in frequency with a factor of 10. If more reduction is desired, simply decimate in steps. Partly based on a filter in ObsPy. :param trace: The trace to be filtered. :param freqmax: The desired lowpass frequency. Will be replaced once ObsPy has a proper decimation filter. """ # rp - maximum ripple of passband, rs - attenuation of stopband rp, rs, order = 1, 96, 1e99 ws = freqmax / (trace.stats.sampling_rate * 0.5) # stop band frequency wp = ws # pass band frequency while True: if order <= 12: break wp *= 0.99 order, wn = signal.cheb2ord(wp, ws, rp, rs, analog=0) b, a = signal.cheby2(order, rs, wn, btype="low", analog=0, output="ba") # Apply twice to get rid of the phase distortion. trace.data = signal.filtfilt(b, a, trace.data) # ========================================================================= # Read ASDF file # ========================================================================= ds = ASDFDataSet(processing_info["asdf_input_filename"], compression=None, mode="r") event = ds.events[0] # Get processing_info npts = processing_info["npts"] sampling_rate = 1.0 / processing_info["dt"] min_period = processing_info["minimum_period"] max_period = processing_info["maximum_period"] origin = event.preferred_origin() or event.origins[0] starttime = origin.time + processing_info["start_time_in_s"] endtime = starttime + processing_info["dt"] * (npts - 1) duration = endtime - starttime f2 = 0.9 / max_period f3 = 1.1 / min_period # Recommendations from the SAC manual. f1 = 0.5 * f2 f4 = 2.0 * f3 pre_filt = (f1, f2, f3, f4) def process_function(st, inv): for tr in st: # Trim to reduce processing costs tr.trim(starttime - 0.2 * duration, endtime + 0.2 * duration) # Decimation while True: decimation_factor = int(processing_info["dt"] / tr.stats.delta) # Decimate in steps for large sample rate reductions. if decimation_factor > 8: decimation_factor = 8 if decimation_factor > 1: new_nyquist = (tr.stats.sampling_rate / 2.0 / float(decimation_factor)) zerophase_chebychev_lowpass_filter(tr, new_nyquist) tr.decimate(factor=decimation_factor, no_filter=True) else: break # Detrend and taper st.detrend("linear") st.detrend("demean") st.taper(max_percentage=0.05, type="hann") # Instrument correction try: st.attach_response(inv) st.remove_response(output="DISP", pre_filt=pre_filt, zero_mean=False, taper=False) except Exception as e: net = inv.get_contents()["channels"][0].split(".", 2)[0] sta = inv.get_contents()["channels"][0].split(".", 2)[1] inf = processing_info["asdf_input_filename"] msg = ( f"Station: {net}.{sta} could not be corrected with the help of" f" asdf file: '{inf}'. Due to: '{e.__repr__()}' " f"Will be skipped.") raise Exception(msg) # Rotate potential BHZ,BH1,BH2 data to BHZ,BHN,BHE if len(st) == 3: for tr in st: if tr.stats.channel in ["BH1", "BH2"]: try: st._rotate_to_zne(inv) break except Exception as e: net = inv.get_contents()["channels"][0].split(".", 2)[0] sta = inv.get_contents()["channels"][0].split(".", 2)[1] inf = processing_info["asdf_input_filename"] msg = ( f"Station: {net}.{sta} could not be rotated with" f" the help of" f" asdf file: '{inf}'. Due to: '{e.__repr__()}' " f"Will be skipped.") raise Exception(msg) # Bandpass filtering st.detrend("linear") st.detrend("demean") st.taper(0.05, type="cosine") st.filter( "bandpass", freqmin=1.0 / max_period, freqmax=1.0 / min_period, corners=3, zerophase=False, ) st.detrend("linear") st.detrend("demean") st.taper(0.05, type="cosine") st.filter( "bandpass", freqmin=1.0 / max_period, freqmax=1.0 / min_period, corners=3, zerophase=False, ) # Sinc interpolation for tr in st: tr.data = np.require(tr.data, requirements="C") st.interpolate( sampling_rate=sampling_rate, method="lanczos", starttime=starttime, window="blackman", a=12, npts=npts, ) # Convert to single precision to save space. for tr in st: tr.data = np.require(tr.data, dtype="float32", requirements="C") return st tag_name = processing_info["preprocessing_tag"] tag_map = {"raw_recording": tag_name} output_filename = processing_info["asdf_output_filename"] tmp_output = output_filename + "_tmp" if os.path.exists(tmp_output): os.remove(tmp_output) ds.process(process_function, tmp_output, tag_map=tag_map) del ds shutil.move(tmp_output, output_filename)
def process_synt(asdf_fn, outputfn, filter_band, old_tag=None, new_tag=None): # read in dataset ds = ASDFDataSet(asdf_fn) max_period = filter_band[1] min_period = filter_band[0] if min_period > max_period: raise ValueError("filter_band incorrect: min_period > max_period") f2 = 1.0 / max_period f3 = 1.0 / min_period f1 = 0.8 * f2 f4 = 1.2 * f3 pre_filt = (f1, f2, f3, f4) # read in event event = ds.events[0] origin = event.preferred_origin() or event.origins[0] event_latitude = origin.latitude event_longitude = origin.longitude event_time = origin.time # Figure out these parameters somehonw! starttime = event_time npts = 3600 sampling_rate = 1.0 def process_function(st, inv): st.detrend("linear") st.detrend("demean") st.taper(max_percentage=0.05, type="hann") # Perform a frequency domain taper like during the response removal # just without an actual response... for tr in st: data = tr.data.astype(np.float64) # smart calculation of nfft dodging large primes from obspy.signal.util import _npts2nfft nfft = _npts2nfft(len(data)) fy = 1.0 / (tr.stats.delta * 2.0) freqs = np.linspace(0, fy, nfft // 2 + 1) # Transform data to Frequency domain data = np.fft.rfft(data, n=nfft) data *= c_sac_taper(freqs, flimit=pre_filt) data[-1] = abs(data[-1]) + 0.0j # transform data back into the time domain data = np.fft.irfft(data)[0:len(data)] # assign processed data and store processing information tr.data = data st.detrend("linear") st.detrend("demean") st.taper(max_percentage=0.05, type="hann") st.interpolate(sampling_rate=sampling_rate, starttime=starttime, npts=npts) components = [tr.stats.channel[-1] for tr in st] if "N" in components and "E" in components: station_latitude = float(inv[0][0].latitude) station_longitude = float(inv[0][0].longitude) _, baz, _ = gps2DistAzimuth(station_latitude, station_longitude, event_latitude, event_longitude) st.rotate(method="NE->RT", back_azimuth=baz) # Convert to single precision to save space. for tr in st: tr.data = np.require(tr.data, dtype="float32") return st tag_map = { old_tag : new_tag } # process ds.process(process_function, outputfn, tag_map=tag_map)
station_latitude = float(inv[0][0].latitude) station_longitude = float(inv[0][0].longitude) _, baz, _ = gps2DistAzimuth(station_latitude, station_longitude, event_latitude, event_longitude) components = [tr.stats.channel[-1] for tr in st] if "N" in components and "E" in components: st.rotate(method="NE->RT", back_azimuth=baz) # Convert to single precision to save space. for tr in st: tr.data = np.require(tr.data, dtype="float32") return st new_tag = "proc_obsd_%i_%i" % (int(min_period), int(max_period)) tag_map = { old_tag : new_tag } outputfn = eventname + "." + new_tag + ".h5" outputfn = os.path.join(outputdir, outputfn) if os.path.exists(outputfn): os.remove(outputfn) ds.process(process_function, outputfn, tag_map=tag_map) t2=time.time() print "Elapsed time:", t2-t1
def process_synt(asdf_fn, outputfn, filter_band, old_tag=None, new_tag=None): # read in dataset ds = ASDFDataSet(asdf_fn) max_period = filter_band[1] min_period = filter_band[0] if min_period > max_period: raise ValueError("filter_band incorrect: min_period > max_period") f2 = 1.0 / max_period f3 = 1.0 / min_period f1 = 0.8 * f2 f4 = 1.2 * f3 pre_filt = (f1, f2, f3, f4) # read in event event = ds.events[0] origin = event.preferred_origin() or event.origins[0] event_latitude = origin.latitude event_longitude = origin.longitude event_time = origin.time # Figure out these parameters somehonw! starttime = event_time npts = 3600 sampling_rate = 1.0 def process_function(st, inv): st.detrend("linear") st.detrend("demean") st.taper(max_percentage=0.05, type="hann") # Perform a frequency domain taper like during the response removal # just without an actual response... for tr in st: data = tr.data.astype(np.float64) # smart calculation of nfft dodging large primes from obspy.signal.util import _npts2nfft nfft = _npts2nfft(len(data)) fy = 1.0 / (tr.stats.delta * 2.0) freqs = np.linspace(0, fy, nfft // 2 + 1) # Transform data to Frequency domain data = np.fft.rfft(data, n=nfft) data *= c_sac_taper(freqs, flimit=pre_filt) data[-1] = abs(data[-1]) + 0.0j # transform data back into the time domain data = np.fft.irfft(data)[0:len(data)] # assign processed data and store processing information tr.data = data st.detrend("linear") st.detrend("demean") st.taper(max_percentage=0.05, type="hann") st.interpolate(sampling_rate=sampling_rate, starttime=starttime, npts=npts) components = [tr.stats.channel[-1] for tr in st] if "N" in components and "E" in components: station_latitude = float(inv[0][0].latitude) station_longitude = float(inv[0][0].longitude) _, baz, _ = gps2DistAzimuth(station_latitude, station_longitude, event_latitude, event_longitude) st.rotate(method="NE->RT", back_azimuth=baz) # Convert to single precision to save space. for tr in st: tr.data = np.require(tr.data, dtype="float32") return st tag_map = {old_tag: new_tag} # process ds.process(process_function, outputfn, tag_map=tag_map)