def preprocess(db, stations, comps, goal_day, params, responses=None): """ Fetches data for each ``stations`` and each ``comps`` using the data_availability table in the database. To correct for instrument responses, make sure to set ``remove_response`` to "Y" in the config and to provide the ``responses`` DataFrame. :Example: >>> from msnoise.api import connect, get_params, preload_instrument_responses >>> from msnoise.preprocessing import preprocess >>> db = connect() >>> params = get_params(db) >>> responses = preload_instrument_responses(db) >>> st = preprocess(db, ["YA.UV06","YA.UV10"], ["Z",], "2010-09-01", params, responses) >>> st 2 Trace(s) in Stream: YA.UV06.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples YA.UV10.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples :type db: :class:`sqlalchemy.orm.session.Session` :param db: A :class:`~sqlalchemy.orm.session.Session` object, as obtained by :func:`msnoise.api.connect`. :type stations: list of str :param stations: a list of station names, in the format NET.STA. :type comps: list of str :param comps: a list of component names, in Z,N,E,1,2. :type goal_day: str :param goal_day: the day of data to load, ISO 8601 format: e.g. 2016-12-31. :type params: class :param params: an object containing the config parameters, as obtained by :func:`msnoise.api.get_params`. :type responses: :class:`pandas.DataFrame` :param responses: a DataFrame containing the instrument responses, as obtained by :func:`msnoise.api.preload_instrument_responses`. :rtype: :class:`obspy.core.stream.Stream` :return: A Stream object containing all traces. """ datafiles = {} output = Stream() MULTIPLEX = False MULTIPLEX_files = {} for station in stations: datafiles[station] = {} net, sta, loc = station.split('.') gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d') files = get_data_availability(db, net=net, sta=sta, loc=loc, starttime=gd, endtime=gd) for comp in comps: datafiles[station][comp] = [] for file in files: if file.sta != "MULTIPLEX": if file.chan[-1] not in comps: continue fullpath = os.path.join(file.path, file.file) datafiles[station][file.chan[-1]].append(fullpath) else: MULTIPLEX = True print("Mutliplex mode, reading the files") fullpath = os.path.join(file.path, file.file) multiplexed = sorted(glob.glob(fullpath)) for comp in comps: for fn in multiplexed: if fn in MULTIPLEX_files: _ = MULTIPLEX_files[fn] else: # print("Reading %s" % fn) _ = read(fn, format=params.archive_format or None) traces = [] for tr in _: if "%s.%s" % ( tr.stats.network, tr.stats.station ) in stations and tr.stats.channel[-1] in comps: traces.append(tr) del _ _ = Stream(traces=traces) MULTIPLEX_files[fn] = _ datafiles[station][comp].append(_) for istation, station in enumerate(stations): net, sta, loc = station.split(".") for comp in comps: files = eval("datafiles['%s']['%s']" % (station, comp)) if len(files) != 0: logger.debug("%s.%s Reading %i Files" % (station, comp, len(files))) traces = [] for file in files: if isinstance(file, Stream): st = file.select(network=net, station=sta, component=comp).copy() else: try: # print("Reading %s" % file) # t= time.time() st = read(file, dytpe=np.float, starttime=UTCDateTime(gd), endtime=UTCDateTime(gd) + 86400, station=sta, format=params.archive_format or None) # print("done in", time.time()-t) except: logger.debug("ERROR reading file %s" % file) # TODO add traceback (optional?) continue for tr in st: if len(tr.stats.channel) == 2: tr.stats.channel += tr.stats.location tr.stats.location = "00" tmp = st.select(network=net, station=sta, component=comp) if not len(tmp): for tr in st: tr.stats.network = net st = st.select(network=net, station=sta, component=comp) else: st = tmp for tr in st: tr.data = tr.data.astype(np.float) tr.stats.network = tr.stats.network.upper() tr.stats.station = tr.stats.station.upper() tr.stats.channel = tr.stats.channel.upper() if tr.stats.location == "": tr.stats.location = "--" traces.append(tr) del st stream = Stream(traces=traces) if not (len(stream)): continue f = io.BytesIO() stream.write(f, format='MSEED') f.seek(0) stream = read(f, format="MSEED") stream.sort() # try: # # HACK not super clean... should find a way to prevent the # # same trace id with different sps to occur # stream.merge(method=1, interpolation_samples=3, fill_value=None) # except: # logger.debug("Error while merging...") # traceback.print_exc() # continue # stream = stream.split() if not len(stream): continue logger.debug("%s Checking sample alignment" % stream[0].id) for i, trace in enumerate(stream): stream[i] = check_and_phase_shift( trace, params.preprocess_taper_length) logger.debug("%s Checking Gaps" % stream[0].id) if len(getGaps(stream)) > 0: max_gap = params.preprocess_max_gap * stream[ 0].stats.sampling_rate gaps = getGaps(stream) while len(gaps): too_long = 0 for gap in gaps: if int(gap[-1]) <= max_gap: try: stream[gap[0]] = stream[gap[0]].__add__( stream[gap[1]], method=1, fill_value="interpolate") stream.remove(stream[gap[1]]) except: stream.remove(stream[gap[1]]) break else: too_long += 1 if too_long == len(gaps): break gaps = getGaps(stream) del gaps stream = stream.split() for tr in stream: if tr.stats.sampling_rate < (params.goal_sampling_rate - 1): stream.remove(tr) taper_length = params.preprocess_taper_length # seconds for trace in stream: if trace.stats.npts < (4 * taper_length * trace.stats.sampling_rate): stream.remove(trace) else: trace.detrend(type="demean") trace.detrend(type="linear") trace.taper(max_percentage=None, max_length=taper_length) if not len(stream): logger.debug(" has only too small traces, skipping...") continue for trace in stream: logger.debug("%s Highpass at %.2f Hz" % (trace.id, params.preprocess_highpass)) trace.filter("highpass", freq=params.preprocess_highpass, zerophase=True, corners=4) if trace.stats.sampling_rate != params.goal_sampling_rate: logger.debug("%s Lowpass at %.2f Hz" % (trace.id, params.preprocess_lowpass)) trace.filter("lowpass", freq=params.preprocess_lowpass, zerophase=True, corners=8) if params.resampling_method == "Resample": logger.debug("%s Downsample to %.1f Hz" % (trace.id, params.goal_sampling_rate)) trace.data = resample( trace.data, params.goal_sampling_rate / trace.stats.sampling_rate, 'sinc_fastest') elif params.resampling_method == "Decimate": decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate if not int(decimation_factor) == decimation_factor: logger.warning( "%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods" " Trace sampling rate = %i ; Desired CC sampling rate = %i" % (trace.id, trace.stats.sampling_rate, params.goal_sampling_rate)) sys.stdout.flush() sys.exit() logger.debug("%s Decimate by a factor of %i" % (trace.id, decimation_factor)) trace.data = trace.data[::int(decimation_factor)] elif params.resampling_method == "Lanczos": logger.debug("%s Downsample to %.1f Hz" % (trace.id, params.goal_sampling_rate)) trace.data = np.array(trace.data) trace.interpolate( method="lanczos", sampling_rate=params.goal_sampling_rate, a=1.0) trace.stats.sampling_rate = params.goal_sampling_rate del trace if params.remove_response: logger.debug('%s Removing instrument response' % stream[0].id) try: stream.attach_response(responses) stream.remove_response( pre_filt=params.response_prefilt, taper=False) except: logger.error("Bad or no instrument response " "information for %s, skipping" % stream[0].id) continue for tr in stream: tr.data = tr.data.astype(np.float32) output += stream del stream del files clean_scipy_cache() del MULTIPLEX_files return output
stime = UTCDateTime('2019-08-16 12:59:10') etime = stime + 120 client = Client() inv = client.get_stations(network=net, station=sta, starttime=stime, endtime=etime, channel=chan, level="response") st = Stream() st += client.get_waveforms(net, sta, loc, chan, stime, etime) st.detrend('constant') st.merge(fill_value=0) st.attach_response(inv) st.remove_response(output="DISP") #st.rotate(method="->ZNE",inventory=inv) st.filter("bandpass", freqmin=.5, freqmax=5) tr = st[0] t = np.linspace(0, (tr.stats.npts - 1) / tr.stats.sampling_rate, num=tr.stats.npts) fig = plt.figure(1, figsize=(12, 12)) plt.ylabel('Displacement (mm)', fontsize=14) plt.xlim([0, 120]) plt.ylim([-.02, .02]) plt.xlabel('seconds after origin', fontsize=14) plt.title('%s-%s-%s-%s, 2019-08-16, Hutchinson, KS Earthquake' % (net, sta, loc, chan), fontsize=14)