def spectralWhitening(data, sr=None, smoothi=None, freq_domain=False, apply_filter=None): """ Apply spectral whitening to data. sr: sampling rate (only needed for smoothing) smoothi: None or int Data is divided by its smoothed (Default: None) amplitude spectrum. """ if freq_domain: mask = False spec = data else: mask = np.ma.getmask(data) N = len(data) nfft = nextpow2(N) spec = fft(data, nfft) #df = sr/N spec_ampl = np.sqrt(np.abs(np.multiply(spec, np.conjugate(spec)))) if isinstance(smoothi, basestring) and isnumber(smoothi) and smoothi > 0: smoothi = int(smoothi * N / sr) spec /= ifftshift(smooth(fftshift(spec_ampl), smoothi)) else: spec /= spec_ampl if apply_filter is not None: spec *= filterResp(*apply_filter, sr=sr, N=len(spec), whole=True)[1] if freq_domain: return spec else: ret = np.real(ifft(spec, nfft)[:N]) if USE_FFTW3: ret = ret.copy() return fillArray(ret, mask=mask, fill_value=0.)
def timeNorm(data, method=None, param=None, recursive=0): """ Calculates normalized data. See Bensen et al.(2007) Method is a string. There are the following methods: 1bit: reduce data to +1 if >0 and -1 if <0 clip: clip data to the root mean square (rms) eventremoval: automatic event detection and removal - if an value is bigger than the threshold, the following values are set to zero. param: (threshold, number of samples (eg. 30min) to set to zero) stalta: automatic event removing with recursive sta/lta trigger runningmean: the data is normalized with the running average The width of the normalization window determines how much amplitude information is retained (N=1 -> 1bit normalization, N very big -> rescaled data). Half of the maximum period of the passband filter_ works well. param: width of window (should be odd) runningmean_over_filtered: the data is normalized with the running average over the filtered data. A band pass filter_ between 20s and 100s period can remove local seismicity. param: (width of window in seconds, sampling rate, filter_, freq1, freq2) filter_: in ('band', 'low', high') if filter_ in ('low', 'high') only on frequency is needed waterlevel: any amplitude above the waterlevel (multiple of rms) is down-weighted by a factor. This procedure is repeated all of the waveform data is under the water-level param: (water-level factor, reducing factor) """ mask = np.ma.getmask(data) if method == '1bit': data = np.sign(data) elif method == 'clip': std = np.std(data) data[data > std] = std data[data < -std] = -std elif method == 'eventremoval': if param == None: # remove 30 min (at 10Hz) after events if data is bigger than 2000 param = (2000, 30 * 60 * 10) clip = np.nonzero(abs(data) >= param[0])[0] if len(clip) > 0: clip = clip[0] index = min(clip + param[1], len(data)) data[clip:index] = 0 if index < len(data): data[index:] = timeNorm(data[index:], method=method, param=param) elif method == 'stalta': if param is None: # STA: 3s at 100Hz, LTA: 10s, trigger on: 1.2, trigger off:1.0 param = (100 * 3, 100 * 10, 1.2, 1.0) cft = obspy.signal.trigger.recSTALTA(data, param[0], param[1]) trg = obspy.signal.trigger.triggerOnset(cft, param[2], param[3]) for on, off in trg: data[on:off] = 0 elif method == 'runningmean': if param == None: # smooth over 20s at 10Hz param = 10 * 10 smoothed = smooth(np.abs(data), param) data /= smoothed elif method == 'runningmean_over_filtered': if param is None: # smooth over 20s at 10Hz over bandpassed data param = (10, 10, 'band', 1 / 50., 1 / 15.) sr = param[1] over = int(param[0] * sr) filter_ = param[2] if filter_ == 'band': data2 = obspy.signal.bandpass(data, param[3], param[4], sr) elif filter_ == 'low': data2 = obspy.signal.lowpass(data, param[3], sr) elif filter_ == 'high': data2 = obspy.signal.highpass(data, param[3], sr) else: raise ValueError("filter_ should be in ('band', 'high', 'low')") data /= smooth(np.abs(data2), over) elif method == 'waterlevel': if param == None: # data above 6*rms is recursively reduced by a factor of 10 param = (6., 10.) waterlevel = param[0] * np.std(data) indices = np.abs(data) > waterlevel if np.any(indices): if param[1] == 0: data[indices] = 0 else: data[indices] /= param[1] data = timeNorm(data, method=method, param=param, recursive=recursive + 1) elif method == 'waterlevel_rm': if param == None: # running mean over 5s at 10Hz data # data above 4*rms is recursively reduced by a factor of 10 param = (5 * 10, 4., 10.) running_mean = smooth(np.abs(data), param[0]) waterlevel = param[1] * np.std(running_mean) indices = (running_mean > waterlevel) + (np.abs(data) > waterlevel) if np.any(indices): param = list(param) frac_zeros = 1. * np.count_nonzero(indices) / len(data) if param[2] == 0: data[indices] = 0 param[1] *= (1 + frac_zeros) else: data[indices] /= param[2] param[1] *= (1 + frac_zeros * (1 - 1 / param[2])) print recursive, frac_zeros, waterlevel data = timeNorm(data, method=method, param=param, recursive=recursive + 1) elif method == 'waterlevel_env': if param == None: # data above 4*rms is recursively reduced by a factor of 10 param = (4., 10.) param = list(param) if len(param) == 2: param.append(0) param.append(0) env = obspy.signal.cpxtrace.envelope(data)[1][:len(data)] # correct std because of zeros waterlevel = param[0] * np.std(env) / (1 - param[2]) # import pylab as plt # from imaging import plotTrace # from sito import Trace # trace = Trace(data=data) # trace2 = Trace(data=env) # plotTrace(trace) # plotTrace(trace2) # plt.figure() # plt.plot(data) # plt.plot(env) # plt.hlines(waterlevel, 0, len(data)) # plt.show() indices = env > waterlevel frac_zeros = 1. * np.count_nonzero(indices) / len(data) if np.any(indices) and frac_zeros > 0.0005 and param[3] < 20: if param[1] == 0: data[indices] = 0 #param[0] *= (1 + frac_zeros) else: data[indices] /= param[2] #param[0] *= (1 + frac_zeros * (1 - 1 / param[1])) print param[3], frac_zeros, param[2], waterlevel param[2] += frac_zeros param[3] += 1 data = timeNorm(data, method=method, param=param) elif method == 'waterlevel_env2': if param == None: # data above 4*rms is recursively reduced by a factor of 10 param = (4., 10.) N = len(data) env = obspy.signal.cpxtrace.envelope(data)[1][:N] if mask is not False: env[mask] = 0. num_stds = 96 # 24*4 =^ every 15min if N < 86400: # 24*3600 num_stds = N // 900 len_parts = N // num_stds # N//96 = N//24//4 =^ 15min len_stds = len_parts // 15 # len_parts//15 =^ 1min stds = np.array([ np.std(env[i:i + len_stds]) for i in np.arange(num_stds) * len_parts ]) if np.min(stds) == 0: stds = stds[stds != 0.] num_stds = len(stds) stds = np.sort(stds)[num_stds // 15:-num_stds // 15] stds = stds[stds < np.min(stds) * 2.] waterlevel = param[0] * np.mean(stds) # import pylab as plt # from imaging import plotTrace # from sito import Trace # trace = Trace(data=data) # trace2 = Trace(data=env) # plotTrace(trace) # plotTrace(trace2) # plt.figure() # plt.plot(data) # plt.plot(env) # plt.hlines(waterlevel, 0, len(data)) # plt.show() indices = env > waterlevel #frac_zeros = 1. * np.count_nonzero(indices) / len(data) if np.any(indices): if param[1] == 0: # not setting values to zero but masking them # -> they will stay zero after spectral whitening # and 1bit normalization mask = np.ma.mask_or(mask, indices) #data[indices] = 0 else: data[indices] /= param[2] elif method is not None: raise ValueError('The method passed to timeNorm() is not known.') return fillArray(data, mask=mask, fill_value=0.)
def plotPSD(self, ax=None, x_time=True, scale_by_freq=True, Nfft=256 * 16 * 16, pad_to=None, xscale='log', yscale='log', grid=True, xlabel='time (s)', ylabel=None, figtitle='PSD station component date', title_in_axis=False, smooth=True, just_calculate=False, **kwargs): """ Plot PSD of first trace. Doc matplotlib.mlab.psd: """ if self.stats.is_fft: pxx = self.data if 'freq_min' in self.stats: freqs = np.linspace(self.stats.freq_min, self.stats.freq_max, self.stats.npts) else: freqs = self.fftfreq() else: pxx, freqs = psd(self.data, NFFT=Nfft, Fs=self.stats.sampling_rate, scale_by_freq=scale_by_freq, pad_to=pad_to) if just_calculate: return pxx, freqs if x_time: pxx = pxx[::-1] freqs = 1. / freqs[::-1] elif 'time' in xlabel: xlabel = 'freq (Hz)' if smooth: pxx = util.smooth(pxx, smooth) if ax is None: fig = plt.figure() ax = fig.add_subplot(111) else: fig = ax.get_figure() # ## print title if figtitle is not None: figtitle = figtitle.replace('station', self.stats.station) figtitle = figtitle.replace('component', self.stats.channel[-1]) try: starttime = self.stats.starttime + 0.5 figtitle = figtitle.replace('time', '%s' % starttime) figtitle = figtitle.replace('date', '%s' % starttime.date) figtitle = figtitle.replace('year', '%d' % starttime.year) figtitle = figtitle.replace('nfft', '%d' % Nfft) except: pass if not title_in_axis: fig.suptitle(figtitle, x=0.5, horizontalalignment='center') # fig.text(title, 0., 0.95, horizontalalignment = 'left' ) else: ax.text(0.1, 1, figtitle, verticalalignment='top', transform=ax.transAxes) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.set_xscale(xscale) ax.set_yscale(yscale) ax.grid(grid) ax.plot(freqs, pxx, **kwargs) return ax
def plotPSD(self, ax=None, x_time=True, scale_by_freq=True, Nfft=256 * 16 * 16, pad_to=None, xscale='log', yscale='log', grid=True, xlabel='time (s)', ylabel=None, figtitle='PSD station component date', title_in_axis=False, smooth=True, just_calculate=False, ** kwargs): """ Plot PSD of first trace. Doc matplotlib.mlab.psd: """ if self.stats.is_fft: pxx = self.data if 'freq_min' in self.stats: freqs = np.linspace(self.stats.freq_min, self.stats.freq_max, self.stats.npts) else: freqs = self.fftfreq() else: pxx, freqs = psd(self.data, NFFT=Nfft, Fs=self.stats.sampling_rate, scale_by_freq=scale_by_freq, pad_to=pad_to) if just_calculate: return pxx, freqs if x_time: pxx = pxx[::-1] freqs = 1. / freqs[::-1] elif 'time' in xlabel: xlabel = 'freq (Hz)' if smooth: pxx = util.smooth(pxx, smooth) if ax is None: fig = plt.figure() ax = fig.add_subplot(111) else: fig = ax.get_figure() # ## print title if figtitle is not None: figtitle = figtitle.replace('station', self.stats.station) figtitle = figtitle.replace('component', self.stats.channel[-1]) try: starttime = self.stats.starttime + 0.5 figtitle = figtitle.replace('time', '%s' % starttime) figtitle = figtitle.replace('date', '%s' % starttime.date) figtitle = figtitle.replace('year', '%d' % starttime.year) figtitle = figtitle.replace('nfft', '%d' % Nfft) except: pass if not title_in_axis: fig.suptitle(figtitle, x=0.5, horizontalalignment='center') # fig.text(title, 0., 0.95, horizontalalignment = 'left' ) else: ax.text(0.1, 1, figtitle, verticalalignment='top', transform=ax.transAxes) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.set_xscale(xscale) ax.set_yscale(yscale) ax.grid(grid) ax.plot(freqs, pxx, **kwargs) return ax
def timeNorm(data, method=None, param=None, recursive=0): """ Calculates normalized data. See Bensen et al.(2007) Method is a string. There are the following methods: 1bit: reduce data to +1 if >0 and -1 if <0 clip: clip data to the root mean square (rms) eventremoval: automatic event detection and removal - if an value is bigger than the threshold, the following values are set to zero. param: (threshold, number of samples (eg. 30min) to set to zero) stalta: automatic event removing with recursive sta/lta trigger runningmean: the data is normalized with the running average The width of the normalization window determines how much amplitude information is retained (N=1 -> 1bit normalization, N very big -> rescaled data). Half of the maximum period of the passband filter_ works well. param: width of window (should be odd) runningmean_over_filtered: the data is normalized with the running average over the filtered data. A band pass filter_ between 20s and 100s period can remove local seismicity. param: (width of window in seconds, sampling rate, filter_, freq1, freq2) filter_: in ('band', 'low', high') if filter_ in ('low', 'high') only on frequency is needed waterlevel: any amplitude above the waterlevel (multiple of rms) is down-weighted by a factor. This procedure is repeated all of the waveform data is under the water-level param: (water-level factor, reducing factor) """ mask = np.ma.getmask(data) if method == '1bit': data = np.sign(data) elif method == 'clip': std = np.std(data) data[data > std] = std data[data < -std] = -std elif method == 'eventremoval': if param == None: # remove 30 min (at 10Hz) after events if data is bigger than 2000 param = (2000, 30 * 60 * 10) clip = np.nonzero(abs(data) >= param[0])[0] if len(clip) > 0: clip = clip[0] index = min(clip + param[1], len(data)) data[clip:index] = 0 if index < len(data): data[index:] = timeNorm(data[index:], method=method, param=param) elif method == 'stalta': if param is None: # STA: 3s at 100Hz, LTA: 10s, trigger on: 1.2, trigger off:1.0 param = (100 * 3, 100 * 10, 1.2, 1.0) cft = obspy.signal.trigger.recSTALTA(data, param[0], param[1]) trg = obspy.signal.trigger.triggerOnset(cft, param[2], param[3]) for on, off in trg: data[on:off] = 0 elif method == 'runningmean': if param == None: # smooth over 20s at 10Hz param = 10 * 10 smoothed = smooth(np.abs(data), param) data /= smoothed elif method == 'runningmean_over_filtered': if param is None: # smooth over 20s at 10Hz over bandpassed data param = (10, 10, 'band', 1 / 50., 1 / 15.) sr = param[1] over = int(param[0] * sr) filter_ = param[2] if filter_ == 'band': data2 = obspy.signal.bandpass(data, param[3], param[4], sr) elif filter_ == 'low': data2 = obspy.signal.lowpass(data, param[3], sr) elif filter_ == 'high': data2 = obspy.signal.highpass(data, param[3], sr) else: raise ValueError("filter_ should be in ('band', 'high', 'low')") data /= smooth(np.abs(data2), over) elif method == 'waterlevel': if param == None: # data above 6*rms is recursively reduced by a factor of 10 param = (6., 10.) waterlevel = param[0] * np.std(data) indices = np.abs(data) > waterlevel if np.any(indices): if param[1] == 0: data[indices] = 0 else: data[indices] /= param[1] data = timeNorm(data, method=method, param=param, recursive=recursive + 1) elif method == 'waterlevel_rm': if param == None: # running mean over 5s at 10Hz data # data above 4*rms is recursively reduced by a factor of 10 param = (5 * 10, 4., 10.) running_mean = smooth(np.abs(data), param[0]) waterlevel = param[1] * np.std(running_mean) indices = (running_mean > waterlevel) + (np.abs(data) > waterlevel) if np.any(indices): param = list(param) frac_zeros = 1. * np.count_nonzero(indices) / len(data) if param[2] == 0: data[indices] = 0 param[1] *= (1 + frac_zeros) else: data[indices] /= param[2] param[1] *= (1 + frac_zeros * (1 - 1 / param[2])) print recursive, frac_zeros, waterlevel data = timeNorm(data, method=method, param=param, recursive=recursive + 1) elif method == 'waterlevel_env': if param == None: # data above 4*rms is recursively reduced by a factor of 10 param = (4., 10.) param = list(param) if len(param) == 2: param.append(0) param.append(0) env = obspy.signal.cpxtrace.envelope(data)[1][:len(data)] # correct std because of zeros waterlevel = param[0] * np.std(env) / (1 - param[2]) # import pylab as plt # from imaging import plotTrace # from sito import Trace # trace = Trace(data=data) # trace2 = Trace(data=env) # plotTrace(trace) # plotTrace(trace2) # plt.figure() # plt.plot(data) # plt.plot(env) # plt.hlines(waterlevel, 0, len(data)) # plt.show() indices = env > waterlevel frac_zeros = 1. * np.count_nonzero(indices) / len(data) if np.any(indices) and frac_zeros > 0.0005 and param[3] < 20: if param[1] == 0: data[indices] = 0 #param[0] *= (1 + frac_zeros) else: data[indices] /= param[2] #param[0] *= (1 + frac_zeros * (1 - 1 / param[1])) print param[3], frac_zeros, param[2], waterlevel param[2] += frac_zeros param[3] += 1 data = timeNorm(data, method=method, param=param) elif method == 'waterlevel_env2': if param == None: # data above 4*rms is recursively reduced by a factor of 10 param = (4., 10.) N = len(data) env = obspy.signal.cpxtrace.envelope(data)[1][:N] if mask is not False: env[mask] = 0. num_stds = 96 # 24*4 =^ every 15min if N < 86400: # 24*3600 num_stds = N // 900 len_parts = N // num_stds # N//96 = N//24//4 =^ 15min len_stds = len_parts // 15 # len_parts//15 =^ 1min stds = np.array([np.std(env[i:i + len_stds]) for i in np.arange(num_stds) * len_parts]) if np.min(stds) == 0: stds = stds[stds != 0.] num_stds = len(stds) stds = np.sort(stds)[num_stds // 15:-num_stds // 15] stds = stds[stds < np.min(stds) * 2.] waterlevel = param[0] * np.mean(stds) # import pylab as plt # from imaging import plotTrace # from sito import Trace # trace = Trace(data=data) # trace2 = Trace(data=env) # plotTrace(trace) # plotTrace(trace2) # plt.figure() # plt.plot(data) # plt.plot(env) # plt.hlines(waterlevel, 0, len(data)) # plt.show() indices = env > waterlevel #frac_zeros = 1. * np.count_nonzero(indices) / len(data) if np.any(indices): if param[1] == 0: # not setting values to zero but masking them # -> they will stay zero after spectral whitening # and 1bit normalization mask = np.ma.mask_or(mask, indices) #data[indices] = 0 else: data[indices] /= param[2] elif method is not None: raise ValueError('The method passed to timeNorm() is not known.') return fillArray(data, mask=mask, fill_value=0.)