def preprocess(db, stations, comps, goal_day, params, tramef_Z, tramef_E=np.array([]), tramef_N=np.array([])): datafilesZ = {} datafilesE = {} datafilesN = {} for station in stations: datafilesZ[station] = [] datafilesE[station] = [] datafilesN[station] = [] net, sta = station.split('.') gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d') files = get_data_availability( db, net=net, sta=sta, starttime=gd, endtime=gd) for file in files: comp = file.comp fullpath = os.path.join(file.path, file.file) if comp[-1] == 'Z': datafilesZ[station].append(fullpath) elif comp[-1] == 'E': datafilesE[station].append(fullpath) elif comp[-1] == 'N': datafilesN[station].append(fullpath) j = 0 for istation, station in enumerate(stations): for comp in comps: files = eval("datafiles%s['%s']" % (comp, station)) if len(files) != 0: logging.debug("%s.%s Reading %i Files" % (station, comp, len(files))) stream = Stream() for file in sorted(files): st = read(file, dytpe=np.float, starttime=UTCDateTime(gd), endtime=UTCDateTime(gd) + 86400) for tr in st: tr.data = tr.data.astype(np.float) stream += st del st logging.debug("Checking sample alignment") for i, trace in enumerate(stream): stream[i] = check_and_phase_shift(trace) stream.sort() logging.debug("Checking Gaps") if len(getGaps(stream)) > 0: max_gap = 10 only_too_long = False while getGaps(stream) and not only_too_long: too_long = 0 gaps = getGaps(stream) for gap in gaps: if int(gap[-1]) <= max_gap: stream[gap[0]] = stream[gap[0]].__add__(stream[gap[1]], method=0, fill_value="interpolate") stream.remove(stream[gap[1]]) break else: too_long += 1 if too_long == len(gaps): only_too_long = True taper_length = 20.0 # seconds for trace in stream: if trace.stats.npts < 4 * taper_length * trace.stats.sampling_rate: trace.data = np.zeros(trace.stats.npts) else: trace.detrend(type="demean") trace.detrend(type="linear") taper_1s = taper_length * float(trace.stats.sampling_rate) / trace.stats.npts cp = cosine_taper(trace.stats.npts, taper_1s) trace.data *= cp try: stream.merge(method=0, fill_value=0.0) except: continue logging.debug("%s.%s Slicing Stream to %s:%s" % (station, comp, utcdatetime.UTCDateTime( goal_day.replace('-', '')), utcdatetime.UTCDateTime( goal_day.replace('-', '')) + params.goal_duration - stream[0].stats.delta)) stream[0].trim(utcdatetime.UTCDateTime(goal_day.replace('-', '')), utcdatetime.UTCDateTime( goal_day.replace('-', '')) + params.goal_duration - stream[0].stats.delta, pad=True, fill_value=0.0, nearest_sample=False) if get_config(db, 'remove_response', isbool=True): logging.debug('Removing instrument response') response_format = get_config(db, 'response_format') response_prefilt = eval(get_config(db, 'response_prefilt')) files = glob.glob(os.path.join(get_config(db, 'response_path'), "*")) if response_format == "inventory": firstinv = True inventory = None for file in files: try: inv = read_inventory(file) if firstinv: inventory = inv firstinv = False else: inventory += inv except: traceback.print_exc() pass if inventory: stream.attach_response(inventory) stream.remove_response(output='VEL', pre_filt=response_prefilt) elif response_format == "dataless": for file in files: p = Parser(file) try: p.getPAZ(stream[0].id, datetime=UTCDateTime(gd)) break except: traceback.print_exc() del p continue stream.simulate(seedresp={'filename': p, "units": "VEL"}, pre_filt=response_prefilt, paz_remove=None, paz_simulate=None, ) elif response_format == "paz": msg = "Unexpected type for `response_format`: %s" % \ response_format raise TypeError(msg) elif response_format == "resp": msg = "Unexpected type for `response_format`: %s" % \ response_format raise TypeError(msg) else: msg = "Unexpected type for `response_format`: %s" % \ response_format raise TypeError(msg) trace = stream[0] logging.debug( "%s.%s Highpass at %.2f Hz" % (station, comp, params.preprocess_highpass)) trace.filter("highpass", freq=params.preprocess_highpass, zerophase=True) if trace.stats.sampling_rate != params.goal_sampling_rate: logging.debug( "%s.%s Lowpass at %.2f Hz" % (station, comp, params.preprocess_lowpass)) trace.filter("lowpass", freq=params.preprocess_lowpass, zerophase=True, corners=8) if params.resampling_method == "Resample": logging.debug("%s.%s Downsample to %.1f Hz" % (station, comp, params.goal_sampling_rate)) trace.data = resample( trace.data, params.goal_sampling_rate / trace.stats.sampling_rate, 'sinc_fastest') elif params.resampling_method == "Decimate": decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate if not int(decimation_factor) == decimation_factor: logging.warning("%s.%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods" " Trace sampling rate = %i ; Desired CC sampling rate = %i" % (station, comp, trace.stats.sampling_rate, params.goal_sampling_rate)) sys.stdout.flush() sys.exit() logging.debug("%s.%s Decimate by a factor of %i" % (station, comp, decimation_factor)) trace.data = trace.data[::decimation_factor] elif params.resampling_method == "Lanczos": logging.debug("%s.%s Downsample to %.1f Hz" % (station, comp, params.goal_sampling_rate)) trace.data = np.array(trace.data) trace.interpolate(method="lanczos", sampling_rate=params.goal_sampling_rate, a=1.0) trace.stats.sampling_rate = params.goal_sampling_rate year, month, day, hourf, minf, secf, wday, yday, isdst = trace.stats.starttime.utctimetuple() if j == 0: t = time.strptime("%04i:%02i:%02i:%02i:%02i:%02i" % (year, month, day, hourf, minf, secf), "%Y:%m:%d:%H:%M:%S") basetime = calendar.timegm(t) if len(trace.data) % 2 != 0: trace.data = np.append(trace.data, 0.) if len(trace.data) != len(tramef_Z[istation]): missing = len(tramef_Z[istation]) - len(trace.data) for i in range(missing): trace.data = np.append(trace.data, 0.) if comp == "Z": tramef_Z[istation] = trace.data elif comp == "E": tramef_E[istation] = trace.data elif comp == "N": tramef_N[istation] = trace.data del trace, stream if len(tramef_E) != 0: return basetime, tramef_Z, tramef_E, tramef_N else: return basetime, tramef_Z
def get_paz(self, seed_id, datetime): """ Get PAZ for a station at given time span. Gain is the A0 normalization constant for the poles and zeros. :type seed_id: str :param seed_id: SEED or channel id, e.g. ``"BW.RJOB..EHZ"`` or ``"EHE"``. :type datetime: :class:`~obspy.core.utcdatetime.UTCDateTime` :param datetime: Time for which the PAZ is requested, e.g. ``'2010-01-01 12:00:00'``. :rtype: dict :return: Dictionary containing zeros, poles, gain and sensitivity. .. rubric:: Example >>> c = Client(timeout=2) >>> paz = c.station.get_paz('BW.MANZ..EHZ', '20090707') >>> paz['zeros'] [0j, 0j] >>> len(paz['poles']) 5 >>> print(paz['poles'][0]) (-0.037004+0.037016j) >>> paz['gain'] 60077000.0 >>> paz['sensitivity'] 2516800000.0 """ # try to read PAZ from previously obtained XSEED data for res in self.client.xml_seeds.get(seed_id, []): parser = Parser(res) try: paz = parser.getPAZ(seed_id=seed_id, datetime=UTCDateTime(datetime)) return paz except: continue network, station, location, channel = seed_id.split(".") # request station information station_list = self.get_list(network=network, station=station, datetime=datetime) if not station_list: return {} # don't allow wild cards for wildcard in ['*', '?']: if wildcard in seed_id: msg = "Wildcards in seed_id are not allowed." raise ValueError(msg) if len(station_list) > 1: warnings.warn("Received more than one XSEED file. Using first.") xml_doc = station_list[0] res = self.client.station.get_resource(xml_doc['resource_name']) reslist = self.client.xml_seeds.setdefault(seed_id, []) if res not in reslist: reslist.append(res) parser = Parser(res) paz = parser.getPAZ(seed_id=seed_id, datetime=UTCDateTime(datetime)) return paz
def preprocessing_function(tr, processing_info): # NOQA """ Function to perform the actual preprocessing for one individual seismogram. This is part of the project so it can change depending on the project. Please keep in mind that you will have to manually update this file to a new version if LASIF is ever updated. """ def zerophase_chebychev_lowpass_filter(trace, freqmax): """ Custom Chebychev type two zerophase lowpass filter useful for decimation filtering. This filter is stable up to a reduction in frequency with a factor of 10. If more reduction is desired, simply decimate in steps. Partly based on a filter in ObsPy. :param trace: The trace to be filtered. :param freqmax: The desired lowpass frequency. Will be replaced once ObsPy has a proper decimation filter. """ # rp - maximum ripple of passband, rs - attenuation of stopband rp, rs, order = 1, 96, 1e99 ws = freqmax / (trace.stats.sampling_rate * 0.5) # stop band frequency wp = ws # pass band frequency while True: if order <= 12: break wp *= 0.99 order, wn = signal.cheb2ord(wp, ws, rp, rs, analog=0) b, a = signal.cheby2(order, rs, wn, btype="low", analog=0, output="ba") # Apply twice to get rid of the phase distortion. trace.data = signal.filtfilt(b, a, trace.data) # ========================================================================= # Read seismograms and gather basic information. # ========================================================================= specfem_delta_delay = -1.0687500 starttime = processing_info["event_information"]["origin_time"] + specfem_delta_delay endtime = starttime + processing_info["process_params"]["dt"] * \ (processing_info["process_params"]["npts"] - 1) duration = endtime - starttime # Make sure the seismograms are long enough. If not, skip them. if starttime < tr.stats.starttime or endtime > tr.stats.endtime: msg = ("The seismogram does not cover the required time span.\n" "Seismogram time span: %s - %s\n" "Requested time span: %s - %s" % ( tr.stats.starttime, tr.stats.endtime, starttime, endtime)) raise LASIFError(msg) # Trim to reduce processing cost. # starttime is the origin time of the event # endtime is the origin time plus the length of the synthetics tr.trim(starttime - 0.2 * duration, endtime + 0.2 * duration) # ========================================================================= # Some basic checks on the data. # ========================================================================= # Non-zero length if not len(tr): msg = "No data found in time window around the event. File skipped." raise LASIFError(msg) # No nans or infinity values allowed. if not np.isfinite(tr.data).all(): msg = "Data contains NaNs or Infs. File skipped" raise LASIFError(msg) # ========================================================================= # Step 1: Decimation # Decimate with the factor closest to the sampling rate of the synthetics. # The data is still oversampled by a large amount so there should be no # problems. This has to be done here so that the instrument correction is # reasonably fast even for input data with a large sampling rate. # ========================================================================= while True: decimation_factor = int(processing_info["process_params"]["dt"] / tr.stats.delta) # Decimate in steps for large sample rate reductions. if decimation_factor > 8: decimation_factor = 8 if decimation_factor > 1: new_nyquist = tr.stats.sampling_rate / 2.0 / float( decimation_factor) zerophase_chebychev_lowpass_filter(tr, new_nyquist) tr.decimate(factor=decimation_factor, no_filter=True) else: break # ========================================================================= # Step 2: Detrend and taper. # ========================================================================= tr.detrend("linear") tr.detrend("demean") tr.taper(max_percentage=0.05, type="hann") # ========================================================================= # Step 3: Instrument correction # Correct seismograms to velocity in m/s. # ========================================================================= output_units = "VEL" station_name = "station.{}_{}.response.xml".format(tr.stats.network, tr.stats.station) station_file = os.path.join("StationXML", station_name) # check if the station file actually exists ============================== if not os.path.exists(station_file): msg = "No station file found for the relevant time span. File skipped" raise LASIFError(msg) # This is really necessary as other filters are just not sharp enough # and lots of energy from other frequency bands leaks into the frequency # band of interest freqmin = processing_info["process_params"]["highpass"] freqmax = processing_info["process_params"]["lowpass"] f2 = 0.9 * freqmin f3 = 1.1 * freqmax # Recommendations from the SAC manual. f1 = 0.5 * f2 f4 = 2.0 * f3 pre_filt = (f1, f2, f3, f4) # processing for seed files ============================================== if "/SEED/" in station_file: # XXX: Check if this is m/s. In all cases encountered so far it # always is, but SEED is in theory also able to specify corrections # to other units... parser = Parser(station_file) try: # The simulate might fail but might still modify the data. The # backup is needed for the backup plan to only correct using # poles and zeros. backup_tr = tr.copy() try: tr.simulate(seedresp={"filename": parser, "units": output_units, "date": tr.stats.starttime}, pre_filt=pre_filt, zero_mean=False, taper=False) except ValueError: warnings.warn("Evalresp failed, will only use the Poles and " "Zeros stage") tr = backup_tr paz = parser.getPAZ(tr.id, tr.stats.starttime) if paz["sensitivity"] == 0: warnings.warn("Sensitivity is 0 in SEED file and will " "not be taken into account!") tr.simulate(paz_remove=paz, remove_sensitivity=False, pre_filt=pre_filt, zero_mean=False, taper=False) else: tr.simulate(paz_remove=paz, pre_filt=pre_filt, zero_mean=False, taper=False) except Exception: msg = ("File could not be corrected with the help of the " "SEED file '%s'. Will be skipped.") \ % processing_info["station_filename"] raise LASIFError(msg) # processing with RESP files ============================================= elif "/RESP/" in station_file: try: tr.simulate(seedresp={"filename": station_file, "units": output_units, "date": tr.stats.starttime}, pre_filt=pre_filt, zero_mean=False, taper=False) except ValueError as e: msg = ("File could not be corrected with the help of the " "RESP file '%s'. Will be skipped. Due to: %s") \ % (processing_info["station_filename"], str(e)) raise LASIFError(msg) elif "StationXML" in station_file: try: inv = obspy.read_inventory(station_file, format="stationxml") except Exception as e: msg = ("Could not open StationXML file '%s'. Due to: %s. Will be " "skipped." % (station_file, str(e))) raise LASIFError(msg) tr.attach_response(inv) try: tr.remove_response(output=output_units, pre_filt=pre_filt, zero_mean=False, taper=False) except Exception as e: msg = ("File could not be corrected with the help of the " "StationXML file '%s'. Due to: '%s' Will be skipped.") \ % (station_file, e.__repr__()) raise LASIFError(msg) else: raise NotImplementedError # ========================================================================= # Step 4: Bandpass filtering # This has to be exactly the same filter as in the source time function # in the case of SES3D. # ========================================================================= tr.detrend("linear") tr.detrend("demean") tr.taper(0.05, type="cosine") tr.filter("bandpass", freqmin=freqmin, freqmax=freqmax, corners=3, zerophase=True) tr.detrend("linear") tr.detrend("demean") tr.taper(0.05, type="cosine") tr.filter("bandpass", freqmin=freqmin, freqmax=freqmax, corners=3, zerophase=True) # ========================================================================= # Step 5: Sinc interpolation # ========================================================================= # Make sure that the data array is at least as long as the # synthetics array. tr.data = np.require(tr.data, requirements="C") tr.interpolate( sampling_rate=1.0 / processing_info["process_params"]["dt"], method="lanczos", starttime=starttime, window="blackman", a=12, npts=processing_info["process_params"]["npts"]) # ========================================================================= # Save processed data and clean up. # ========================================================================= # Convert to single precision to save some space. tr.data = np.require(tr.data, dtype="float32", requirements="C") if hasattr(tr.stats, "mseed"): tr.stats.mseed.encoding = "FLOAT32" return tr
from obspy import UTCDateTime, read from obspy.geodetics import gps2dist_azimuth from obspy.io.xseed import Parser st = read("../data/LKBD.MSEED") paz_wa = { 'sensitivity': 2800, 'zeros': [0j], 'gain': 1, 'poles': [-6.2832 - 4.7124j, -6.2832 + 4.7124j] } parser = Parser("../data/LKBD.dataless") paz_le3d5s = parser.getPAZ("CH.LKBD..EHZ") st.simulate(paz_remove=paz_le3d5s, paz_simulate=paz_wa, water_level=10) t = UTCDateTime("2012-04-03T02:45:03") st.trim(t, t + 50) tr_n = st.select(component="N")[0] ampl_n = max(abs(tr_n.data)) tr_e = st.select(component="E")[0] ampl_e = max(abs(tr_e.data)) ampl = max(ampl_n, ampl_e) sta_lat = 46.38703 sta_lon = 7.62714 event_lat = 46.218
import obspy from obspy.signal import PPSD from obspy.io.xseed import Parser st = obspy.read("http://examples.obspy.org/BW.KW1..EHZ.D.2011.037") tr = st.select(id="BW.KW1..EHZ")[0] parser = Parser("http://examples.obspy.org/dataless.seed.BW_KW1") paz = parser.getPAZ(tr.id) ppsd = PPSD(tr.stats, paz) ppsd.add(st) st = obspy.read("http://examples.obspy.org/BW.KW1..EHZ.D.2011.038") ppsd.add(st) ppsd.plot()
def preprocessing_function(tr, processing_info): # NOQA """ Function to perform the actual preprocessing for one individual seismogram. This is part of the project so it can change depending on the project. Please keep in mind that you will have to manually update this file to a new version if LASIF is ever updated. """ def zerophase_chebychev_lowpass_filter(trace, freqmax): """ Custom Chebychev type two zerophase lowpass filter useful for decimation filtering. This filter is stable up to a reduction in frequency with a factor of 10. If more reduction is desired, simply decimate in steps. Partly based on a filter in ObsPy. :param trace: The trace to be filtered. :param freqmax: The desired lowpass frequency. Will be replaced once ObsPy has a proper decimation filter. """ # rp - maximum ripple of passband, rs - attenuation of stopband rp, rs, order = 1, 96, 1e99 ws = freqmax / (trace.stats.sampling_rate * 0.5) # stop band frequency wp = ws # pass band frequency while True: if order <= 12: break wp *= 0.99 order, wn = signal.cheb2ord(wp, ws, rp, rs, analog=0) b, a = signal.cheby2(order, rs, wn, btype="low", analog=0, output="ba") # Apply twice to get rid of the phase distortion. trace.data = signal.filtfilt(b, a, trace.data) # ========================================================================= # Read seismograms and gather basic information. # ========================================================================= specfem_delta_delay = -1.0687500 starttime = processing_info["event_information"][ "origin_time"] + specfem_delta_delay endtime = starttime + processing_info["process_params"]["dt"] * \ (processing_info["process_params"]["npts"] - 1) duration = endtime - starttime # Make sure the seismograms are long enough. If not, skip them. if starttime < tr.stats.starttime or endtime > tr.stats.endtime: msg = ("The seismogram does not cover the required time span.\n" "Seismogram time span: %s - %s\n" "Requested time span: %s - %s" % (tr.stats.starttime, tr.stats.endtime, starttime, endtime)) raise LASIFError(msg) # Trim to reduce processing cost. # starttime is the origin time of the event # endtime is the origin time plus the length of the synthetics tr.trim(starttime - 0.2 * duration, endtime + 0.2 * duration) # ========================================================================= # Some basic checks on the data. # ========================================================================= # Non-zero length if not len(tr): msg = "No data found in time window around the event. File skipped." raise LASIFError(msg) # No nans or infinity values allowed. if not np.isfinite(tr.data).all(): msg = "Data contains NaNs or Infs. File skipped" raise LASIFError(msg) # ========================================================================= # Step 1: Decimation # Decimate with the factor closest to the sampling rate of the synthetics. # The data is still oversampled by a large amount so there should be no # problems. This has to be done here so that the instrument correction is # reasonably fast even for input data with a large sampling rate. # ========================================================================= while True: decimation_factor = int(processing_info["process_params"]["dt"] / tr.stats.delta) # Decimate in steps for large sample rate reductions. if decimation_factor > 8: decimation_factor = 8 if decimation_factor > 1: new_nyquist = tr.stats.sampling_rate / 2.0 / float( decimation_factor) zerophase_chebychev_lowpass_filter(tr, new_nyquist) tr.decimate(factor=decimation_factor, no_filter=True) else: break # ========================================================================= # Step 2: Detrend and taper. # ========================================================================= tr.detrend("linear") tr.detrend("demean") tr.taper(max_percentage=0.05, type="hann") # ========================================================================= # Step 3: Instrument correction # Correct seismograms to velocity in m/s. # ========================================================================= output_units = "VEL" station_name = "station.{}_{}.response.xml".format(tr.stats.network, tr.stats.station) station_file = os.path.join("StationXML", station_name) # check if the station file actually exists ============================== if not os.path.exists(station_file): msg = "No station file found for the relevant time span. File skipped" raise LASIFError(msg) # This is really necessary as other filters are just not sharp enough # and lots of energy from other frequency bands leaks into the frequency # band of interest freqmin = processing_info["process_params"]["highpass"] freqmax = processing_info["process_params"]["lowpass"] f2 = 0.9 * freqmin f3 = 1.1 * freqmax # Recommendations from the SAC manual. f1 = 0.5 * f2 f4 = 2.0 * f3 pre_filt = (f1, f2, f3, f4) # processing for seed files ============================================== if "/SEED/" in station_file: # XXX: Check if this is m/s. In all cases encountered so far it # always is, but SEED is in theory also able to specify corrections # to other units... parser = Parser(station_file) try: # The simulate might fail but might still modify the data. The # backup is needed for the backup plan to only correct using # poles and zeros. backup_tr = tr.copy() try: tr.simulate(seedresp={ "filename": parser, "units": output_units, "date": tr.stats.starttime }, pre_filt=pre_filt, zero_mean=False, taper=False) except ValueError: warnings.warn("Evalresp failed, will only use the Poles and " "Zeros stage") tr = backup_tr paz = parser.getPAZ(tr.id, tr.stats.starttime) if paz["sensitivity"] == 0: warnings.warn("Sensitivity is 0 in SEED file and will " "not be taken into account!") tr.simulate(paz_remove=paz, remove_sensitivity=False, pre_filt=pre_filt, zero_mean=False, taper=False) else: tr.simulate(paz_remove=paz, pre_filt=pre_filt, zero_mean=False, taper=False) except Exception: msg = ("File could not be corrected with the help of the " "SEED file '%s'. Will be skipped.") \ % processing_info["station_filename"] raise LASIFError(msg) # processing with RESP files ============================================= elif "/RESP/" in station_file: try: tr.simulate(seedresp={ "filename": station_file, "units": output_units, "date": tr.stats.starttime }, pre_filt=pre_filt, zero_mean=False, taper=False) except ValueError as e: msg = ("File could not be corrected with the help of the " "RESP file '%s'. Will be skipped. Due to: %s") \ % (processing_info["station_filename"], str(e)) raise LASIFError(msg) elif "StationXML" in station_file: try: inv = obspy.read_inventory(station_file, format="stationxml") except Exception as e: msg = ("Could not open StationXML file '%s'. Due to: %s. Will be " "skipped." % (station_file, str(e))) raise LASIFError(msg) tr.attach_response(inv) try: tr.remove_response(output=output_units, pre_filt=pre_filt, zero_mean=False, taper=False) except Exception as e: msg = ("File could not be corrected with the help of the " "StationXML file '%s'. Due to: '%s' Will be skipped.") \ % (station_file, e.__repr__()) raise LASIFError(msg) else: raise NotImplementedError # ========================================================================= # Step 4: Bandpass filtering # This has to be exactly the same filter as in the source time function # in the case of SES3D. # ========================================================================= tr.detrend("linear") tr.detrend("demean") tr.taper(0.05, type="cosine") tr.filter("bandpass", freqmin=freqmin, freqmax=freqmax, corners=3, zerophase=True) tr.detrend("linear") tr.detrend("demean") tr.taper(0.05, type="cosine") tr.filter("bandpass", freqmin=freqmin, freqmax=freqmax, corners=3, zerophase=True) # ========================================================================= # Step 5: Sinc interpolation # ========================================================================= # Make sure that the data array is at least as long as the # synthetics array. tr.data = np.require(tr.data, requirements="C") tr.interpolate(sampling_rate=1.0 / processing_info["process_params"]["dt"], method="lanczos", starttime=starttime, window="blackman", a=12, npts=processing_info["process_params"]["npts"]) # ========================================================================= # Save processed data and clean up. # ========================================================================= # Convert to single precision to save some space. tr.data = np.require(tr.data, dtype="float32", requirements="C") if hasattr(tr.stats, "mseed"): tr.stats.mseed.encoding = "FLOAT32" return tr
from __future__ import print_function from math import log10 from obspy import UTCDateTime, read from obspy.geodetics import gps2dist_azimuth from obspy.io.xseed import Parser st = read("../data/LKBD.MSEED") paz_wa = {'sensitivity': 2800, 'zeros': [0j], 'gain': 1, 'poles': [-6.2832 - 4.7124j, -6.2832 + 4.7124j]} parser = Parser("../data/LKBD.dataless") paz_le3d5s = parser.getPAZ("CH.LKBD..EHZ") st.simulate(paz_remove=paz_le3d5s, paz_simulate=paz_wa, water_level=10) t = UTCDateTime("2012-04-03T02:45:03") st.trim(t, t + 50) tr_n = st.select(component="N")[0] ampl_n = max(abs(tr_n.data)) tr_e = st.select(component="E")[0] ampl_e = max(abs(tr_e.data)) ampl = max(ampl_n, ampl_e) sta_lat = 46.38703 sta_lon = 7.62714 event_lat = 46.218
def preprocessing_function(processing_info, iteration): # NOQA """ Function to perform the actual preprocessing for one individual seismogram. This is part of the project so it can change depending on the project. Please keep in mind that you will have to manually update this file to a new version if LASIF is ever updated. You can do whatever you want in this function as long as the function signature is honored. The file is read from ``"input_filename"`` and written to ``"output_filename"``. One goal of this function is to make sure that the data is available at the same time steps as the synthetics. The first time sample of the synthetics will always be the origin time of the event. Furthermore the data has to be converted to m/s. :param processing_info: A dictionary containing information about the file to be processed. It will have the following structure. :type processing_info: dict .. code-block:: python {'event_information': { 'depth_in_km': 22.0, 'event_name': 'GCMT_event_VANCOUVER_ISLAND...', 'filename': '/.../GCMT_event_VANCOUVER_ISLAND....xml', 'latitude': 49.53, 'longitude': -126.89, 'm_pp': 2.22e+18, 'm_rp': -2.78e+18, 'm_rr': -6.15e+17, 'm_rt': 1.98e+17, 'm_tp': 5.14e+18, 'm_tt': -1.61e+18, 'magnitude': 6.5, 'magnitude_type': 'Mwc', 'origin_time': UTCDateTime(2011, 9, 9, 19, 41, 34, 200000), 'region': u'VANCOUVER ISLAND, CANADA REGION'}, 'input_filename': u'/.../raw/7D.FN01A..HHZ.mseed', 'output_filename': u'/.../processed_.../7D.FN01A..HHZ.mseed', 'process_params': { 'dt': 0.75, 'highpass': 0.007142857142857143, 'lowpass': 0.0125, 'npts': 2000}, 'station_coordinates': { 'elevation_in_m': -54.0, 'latitude': 46.882, 'local_depth_in_m': None, 'longitude': -124.3337}, 'station_filename': u'/.../STATIONS/RESP/RESP.7D.FN01A..HH*'} Please note that you also got the iteration object here, so if you want some parameters to change depending on the iteration, just use if/else on the iteration objects. >>> iteration.name # doctest: +SKIP '11' >>> iteration.get_process_params() # doctest: +SKIP {'dt': 0.75, 'highpass': 0.01, 'lowpass': 0.02, 'npts': 500} Use ``$ lasif shell`` to play around and figure out what the iteration objects can do. """ def zerophase_chebychev_lowpass_filter(trace, freqmax): """ Custom Chebychev type two zerophase lowpass filter useful for decimation filtering. This filter is stable up to a reduction in frequency with a factor of 10. If more reduction is desired, simply decimate in steps. Partly based on a filter in ObsPy. :param trace: The trace to be filtered. :param freqmax: The desired lowpass frequency. Will be replaced once ObsPy has a proper decimation filter. """ # rp - maximum ripple of passband, rs - attenuation of stopband rp, rs, order = 1, 96, 1e99 ws = freqmax / (trace.stats.sampling_rate * 0.5) # stop band frequency wp = ws # pass band frequency while True: if order <= 12: break wp *= 0.99 order, wn = signal.cheb2ord(wp, ws, rp, rs, analog=0) b, a = signal.cheby2(order, rs, wn, btype="low", analog=0, output="ba") # Apply twice to get rid of the phase distortion. trace.data = signal.filtfilt(b, a, trace.data) # ========================================================================= # Read seismograms and gather basic information. # ========================================================================= starttime = processing_info["event_information"]["origin_time"] endtime = starttime + processing_info["process_params"]["dt"] * \ (processing_info["process_params"]["npts"] - 1) duration = endtime - starttime st = obspy.read(processing_info["input_filename"]) if len(st) != 1: warnings.warn("The file '%s' has %i traces and not 1. " "Skip all but the first" % (processing_info["input_filename"], len(st))) tr = st[0] # Make sure the seismograms are long enough. If not, skip them. if starttime < tr.stats.starttime or endtime > tr.stats.endtime: msg = ("The seismogram does not cover the required time span.\n" "Seismogram time span: %s - %s\n" "Requested time span: %s - %s" % (tr.stats.starttime, tr.stats.endtime, starttime, endtime)) raise LASIFError(msg) # Trim to reduce processing cost. # starttime is the origin time of the event # endtime is the origin time plus the length of the synthetics tr.trim(starttime - 0.2 * duration, endtime + 0.2 * duration) # ========================================================================= # Some basic checks on the data. # ========================================================================= # Non-zero length if not len(tr): msg = "No data found in time window around the event. File skipped." raise LASIFError(msg) # No nans or infinity values allowed. if not np.isfinite(tr.data).all(): msg = "Data contains NaNs or Infs. File skipped" raise LASIFError(msg) # ========================================================================= # Step 1: Decimation # Decimate with the factor closest to the sampling rate of the synthetics. # The data is still oversampled by a large amount so there should be no # problems. This has to be done here so that the instrument correction is # reasonably fast even for input data with a large sampling rate. # ========================================================================= while True: decimation_factor = int(processing_info["process_params"]["dt"] / tr.stats.delta) # Decimate in steps for large sample rate reductions. if decimation_factor > 8: decimation_factor = 8 if decimation_factor > 1: new_nyquist = tr.stats.sampling_rate / 2.0 / float( decimation_factor) zerophase_chebychev_lowpass_filter(tr, new_nyquist) tr.decimate(factor=decimation_factor, no_filter=True) else: break # ========================================================================= # Step 2: Detrend and taper. # ========================================================================= tr.detrend("linear") tr.detrend("demean") tr.taper(max_percentage=0.05, type="hann") # ========================================================================= # Step 3: Instrument correction # Correct seismograms to velocity in m/s. # ========================================================================= output_units = "VEL" station_file = processing_info["station_filename"] # check if the station file actually exists ============================== if not processing_info["station_filename"]: msg = "No station file found for the relevant time span. File skipped" raise LASIFError(msg) # This is really necessary as other filters are just not sharp enough # and lots of energy from other frequency bands leaks into the frequency # band of interest freqmin = processing_info["process_params"]["highpass"] freqmax = processing_info["process_params"]["lowpass"] f2 = 0.9 * freqmin f3 = 1.1 * freqmax # Recommendations from the SAC manual. f1 = 0.5 * f2 f4 = 2.0 * f3 pre_filt = (f1, f2, f3, f4) # processing for seed files ============================================== if "/SEED/" in station_file: # XXX: Check if this is m/s. In all cases encountered so far it # always is, but SEED is in theory also able to specify corrections # to other units... parser = Parser(station_file) try: # The simulate might fail but might still modify the data. The # backup is needed for the backup plan to only correct using # poles and zeros. backup_tr = tr.copy() try: tr.simulate(seedresp={ "filename": parser, "units": output_units, "date": tr.stats.starttime }, pre_filt=pre_filt, zero_mean=False, taper=False) except ValueError: warnings.warn("Evalresp failed, will only use the Poles and " "Zeros stage") tr = backup_tr paz = parser.getPAZ(tr.id, tr.stats.starttime) if paz["sensitivity"] == 0: warnings.warn("Sensitivity is 0 in SEED file and will " "not be taken into account!") tr.simulate(paz_remove=paz, remove_sensitivity=False, pre_filt=pre_filt, zero_mean=False, taper=False) else: tr.simulate(paz_remove=paz, pre_filt=pre_filt, zero_mean=False, taper=False) except Exception: msg = ("File could not be corrected with the help of the " "SEED file '%s'. Will be skipped.") \ % processing_info["station_filename"] raise LASIFError(msg) # processing with RESP files ============================================= elif "/RESP/" in station_file: try: tr.simulate(seedresp={ "filename": station_file, "units": output_units, "date": tr.stats.starttime }, pre_filt=pre_filt, zero_mean=False, taper=False) except ValueError as e: msg = ("File could not be corrected with the help of the " "RESP file '%s'. Will be skipped. Due to: %s") \ % (processing_info["station_filename"], str(e)) raise LASIFError(msg) elif "/StationXML/" in station_file: try: inv = obspy.read_inventory(station_file, format="stationxml") except Exception as e: msg = ("Could not open StationXML file '%s'. Due to: %s. Will be " "skipped." % (station_file, str(e))) raise LASIFError(msg) tr.attach_response(inv) try: tr.remove_response(output=output_units, pre_filt=pre_filt, zero_mean=False, taper=False) except Exception as e: msg = ("File could not be corrected with the help of the " "StationXML file '%s'. Due to: '%s' Will be skipped.") \ % (processing_info["station_filename"], e.__repr__()), raise LASIFError(msg) else: raise NotImplementedError # ========================================================================= # Step 4: Bandpass filtering # This has to be exactly the same filter as in the source time function # in the case of SES3D. # ========================================================================= tr.detrend("linear") tr.detrend("demean") tr.taper(0.05, type="cosine") tr.filter("bandpass", freqmin=freqmin, freqmax=freqmax, corners=3, zerophase=False) tr.detrend("linear") tr.detrend("demean") tr.taper(0.05, type="cosine") tr.filter("bandpass", freqmin=freqmin, freqmax=freqmax, corners=3, zerophase=False) # ========================================================================= # Step 5: Sinc interpolation # ========================================================================= # Make sure that the data array is at least as long as the # synthetics array. tr.interpolate(sampling_rate=1.0 / processing_info["process_params"]["dt"], method="lanczos", starttime=starttime, window="blackman", a=12, npts=processing_info["process_params"]["npts"]) # ========================================================================= # Save processed data and clean up. # ========================================================================= # Convert to single precision to save some space. tr.data = np.require(tr.data, dtype="float32", requirements="C") if hasattr(tr.stats, "mseed"): tr.stats.mseed.encoding = "FLOAT32" tr.write(processing_info["output_filename"], format=tr.stats._format)