def test_noweight_nointercept(self): """ Test for regression with no weight and intercept at origin """ ref_result = [1.9778043606670241, 0.20639881421917514] result = linear_regression(self.x, self.y, intercept_origin=True) np.testing.assert_equal(len(result), 2) np.testing.assert_allclose(result, ref_result)
def test_noweight_nointercept(self): """ Test for regression with no weight and intercept at origin """ ref_result = [1.9778043606670241, 0.20639881421917514] result = linear_regression(self.x, self.y, intercept_origin=True) np.testing.assert_equal(len(result), 2) np.testing.assert_allclose(result, ref_result)
def test_noweight_intercept(self): """ Test for regression with no weight and any intercept """ ref_result = [1.0161734373705231, 6.090329180877835, 0.053630481620317534, 0.28630842447712868] result = linear_regression(self.x, self.y, intercept_origin=False) np.testing.assert_equal(len(result), 4) np.testing.assert_allclose(result, ref_result)
def test_weight_nointercept(self): """ Test for regression with weight and intercept at origin """ ref_result = [1.8461448748925857, 0.075572144774706959] result = linear_regression(self.x, self.y, self.weights, intercept_origin=True) np.testing.assert_equal(len(result), 2) np.testing.assert_allclose(result, ref_result)
def test_weight_intercept(self): """ Test for regression with weight and any intercept """ ref_result = [1.103354807117634, 5.5337860049233898, 0.076215945728448836, 0.54474791667622224] result = linear_regression(self.x, self.y, self.weights, intercept_origin=False) np.testing.assert_equal(len(result), 4) np.testing.assert_allclose(result, ref_result)
def get_ph_misfit(period,freqmin,freqmax,stacode1,stacode2,vel,indir='/work3/wang/JdF/Denoised_Stack'): """ Calculate phase misfit between the positive and negative lags of a given cross-correlogram """ xcorr_file = indir+'/'+stacode1+'/'+'COR_'+stacode1+'_'+stacode2+'.SAC' if not os.path.isfile(xcorr_file): return 9999.9; tr = obspy.core.read(xcorr_file)[0] npts = tr.stats.sac.npts delta = tr.stats.sac.delta dist = tr.stats.sac.dist L = int((npts-1)/2)+1 arrival = dist/vel data_neg = tr.data[:L] data_neg = data_neg[::-1] data_pos = tr.data[L-1:] #butter_b, butter_a = scipy.signal.butter(4,[freqmin*2*delta,freqmax*2*delta],btype='band') #data_pos = scipy.signal.lfilter(butter_b,butter_a,data_pos) #data_neg = scipy.signal.lfilter(butter_b,butter_a,data_neg) ind0 = max(0, int((arrival-1*period)/delta)) ind1 = min(L, int((arrival+1*period)/delta)) window_length = ind1 - ind0 taper = cosine_taper(window_length,0.85) ccp = data_pos[ind0:ind1] ccp = scipy.signal.detrend(ccp, type='linear') ccp *= taper ccn = data_neg[ind0:ind1] ccn = scipy.signal.detrend(ccn, type='linear') ccn *= taper ns = 1<<(ind1-ind0).bit_length() fpos = scipy.fftpack.fft(ccp, n=ns)[:ns // 2] fneg = scipy.fftpack.fft(ccn, n=ns)[:ns // 2] fpos2 = np.real(fpos) ** 2 + np.imag(fpos) ** 2 fneg2 = np.real(fneg) ** 2 + np.imag(fneg) ** 2 X = fpos*(fneg.conj()) dpos = np.sqrt(fpos2) dneg = np.sqrt(fneg2) dcs = np.abs(X) freq_vec = scipy.fftpack.fftfreq(len(X)*2, delta)[:ns // 2] index_range = np.argwhere(np.logical_and(freq_vec>=freqmin, freq_vec<=freqmax)) n = len(dcs) coh = np.zeros(n).astype('complex') valids = np.argwhere(np.logical_and(np.abs(dpos)>0,np.abs(dneg)>0)) coh[valids] = dcs[valids] / (dpos[valids] * dneg[valids]) coh[coh > (1.+0j)] = 1.0+0j w = 1./(1./(coh[index_range] ** 2)-1.) w[coh[index_range] >= 0.99] = 1./ (1./0.9801 - 1.) w = np.sqrt(w*np.sqrt(dcs[index_range])) w = np.real(w) v = np.real(freq_vec[index_range])*2*np.pi phi = np.angle(X) phi[0] = 0. phi = np.unwrap(phi) phi = phi[index_range] m,_ = linear_regression(v.flatten(),phi.flatten(),w.flatten()) return m
def test_noweight_intercept(self): """ Test for regression with no weight and any intercept """ ref_result = [ 1.0161734373705231, 6.090329180877835, 0.053630481620317534, 0.28630842447712868 ] result = linear_regression(self.x, self.y, intercept_origin=False) np.testing.assert_equal(len(result), 4) np.testing.assert_allclose(result, ref_result)
def test_weight_nointercept(self): """ Test for regression with weight and intercept at origin """ ref_result = [1.8461448748925857, 0.075572144774706959] result = linear_regression(self.x, self.y, self.weights, intercept_origin=True) np.testing.assert_equal(len(result), 2) np.testing.assert_allclose(result, ref_result)
def test_weight_intercept(self): """ Test for regression with weight and any intercept """ ref_result = [ 1.103354807117634, 5.5337860049233898, 0.076215945728448836, 0.54474791667622224 ] result = linear_regression(self.x, self.y, self.weights, intercept_origin=False) np.testing.assert_equal(len(result), 4) np.testing.assert_allclose(result, ref_result)
def main(interval=1, loglevel="INFO"): logger = logbook.Logger(__name__) # Reconfigure logger to show the pid number in log records logger = get_logger('msnoise.compute_dtt_child', loglevel, with_pid=True) logger.info('*** Starting: Compute DT/T ***') db = connect() params = get_params(db) start, end, datelist = build_movstack_datelist(db) mov_stack = get_config(db, "mov_stack") if mov_stack.count(',') == 0: mov_stacks = [int(mov_stack), ] else: mov_stacks = [int(mi) for mi in mov_stack.split(',')] components_to_compute = get_components_to_compute(db) updated_dtt = updated_days_for_dates( db, start, end, '%', jobtype='DTT', returndays=True, interval=datetime.timedelta(days=interval)) interstations = {} for sta1, sta2 in get_station_pairs(db): s1 = "%s_%s" % (sta1.net, sta1.sta) s2 = "%s_%s" % (sta2.net, sta2.sta) if s1 == s2: interstations["%s_%s" % (s1, s2)] = 0.0 else: interstations["%s_%s"%(s1,s2)] = get_interstation_distance(sta1, sta2, sta1.coordinates) filters = get_filters(db, all=False) while is_next_job(db, jobtype='DTT'): jobs = get_next_job(db, jobtype='DTT') stations = [] pairs = [] refs = [] for f in filters: filterid = int(f.ref) for components in components_to_compute: for mov_stack in mov_stacks: logger.info('Loading mov=%i days for filter=%02i' % (mov_stack, filterid)) first = True for job in jobs: refs.append(job.ref) pairs.append(job.pair) netsta1, netsta2 = job.pair.split(':') stations.append(netsta1) stations.append(netsta2) current = job.day sta1 = netsta1.replace(".", "_") sta2 = netsta2.replace(".", "_") pair = "%s_%s" % (sta1, sta2) day = os.path.join('MWCS', "%02i" % filterid, "%03i_DAYS" % mov_stack, components, pair, '%s.txt' % current) # dist = get_interstation_distance(station1, station2, # station1.coordinates) dist = interstations[pair] if dist == 0. and params.dtt_lag == "dynamic": logger.debug('%s: Distance is Zero?!' % pair) if os.path.isfile(day): df = pd.read_csv( day, delimiter=' ', header=None, index_col=0, names=['t', 'dt', 'err', 'coh']) tArray = df.index.values if params.dtt_lag == "static": lmlag = -params.dtt_minlag rmlag = params.dtt_minlag else: lmlag = -dist / params.dtt_v rmlag = dist / params.dtt_v lMlag = lmlag - params.dtt_width rMlag = rmlag + params.dtt_width if params.dtt_sides == "both": tindex = np.where(((tArray >= lMlag) & (tArray <= lmlag)) | ((tArray >= rmlag) & (tArray <= rMlag)))[0] elif params.dtt_sides == "left": tindex = np.where((tArray >= lMlag) & (tArray <= lmlag))[0] else: tindex = np.where((tArray >= rmlag) & (tArray <= rMlag))[0] tmp = np.setdiff1d(np.arange(len(tArray)),tindex) df['err'][tmp] = 1.0 df['coh'][tmp] = 0.0 if first: tArray = df.index.values dtArray = df['dt'] errArray = df['err'] cohArray = df['coh'] pairArray = [pair, ] first = False else: dtArray = np.vstack((dtArray, df['dt'])) errArray = np.vstack((errArray, df['err'])) cohArray = np.vstack((cohArray, df['coh'])) pairArray.append(pair) del df del day if not first: #~ tindex = np.tindwhere(((tArray >= lMlag) & (tArray <= lmlag)) | ( #~ (tArray >= rmlag) & (tArray <= rMlag)))[0] Dates = [] Pairs = [] M = [] EM = [] A = [] EA = [] M0 = [] EM0 = [] if len(pairArray) != 1: # first stack all pairs to a ALL mean pair, using # indexes of selected values: new_dtArray = np.zeros(len(tArray)) new_errArray = np.zeros(len(tArray)) + 9999 new_cohArray = np.zeros(len(tArray)) for i in range(len(tArray)): #~ if i in tindex: if 1: cohindex = np.where( cohArray[:, i] >= params.dtt_mincoh)[0] errindex = np.where( errArray[:, i] <= params.dtt_maxerr)[0] dtindex = np.where( np.abs(dtArray[:, i]) <= params.dtt_maxdt)[0] index = np.intersect1d(cohindex, errindex) index = np.intersect1d(index, dtindex) wavg, wstd = wavg_wstd( dtArray[:, i][index], errArray[:, i][index]) new_dtArray[i] = wavg new_errArray[i] = wstd new_cohArray[i] = 1.0 dtArray = np.vstack((dtArray, new_dtArray)) errArray = np.vstack((errArray, new_errArray)) cohArray = np.vstack((cohArray, new_cohArray)) pairArray.append("ALL") del new_cohArray, new_dtArray, new_errArray,\ cohindex, errindex, dtindex, wavg, wstd # then stack selected pais to GROUPS: groups = {} npairs = len(pairArray)-1 for group in groups.keys(): new_dtArray = np.zeros(len(tArray)) new_errArray = np.zeros(len(tArray)) + 9999 new_cohArray = np.zeros(len(tArray)) pairindex = [] for j, pair in enumerate(pairArray[:npairs]): net1, sta1, net2, sta2 = pair.split('_') if sta1 in groups[group] and \ sta2 in groups[group]: pairindex.append(j) pairindex = np.array(pairindex) for i in range(len(tArray)): #~ if i in tindex: if 1: cohindex = np.where( cohArray[:, i] >= params.dtt_mincoh)[0] errindex = np.where( errArray[:, i] <= params.dtt_maxerr)[0] dtindex = np.where( np.abs(dtArray[:, i]) <= params.dtt_maxdt)[0] index = np.intersect1d(cohindex, errindex) index = np.intersect1d(index, dtindex) index = np.intersect1d(index, pairindex) wavg, wstd = wavg_wstd( dtArray[:, i][index], errArray[:, i][index]) new_dtArray[i] = wavg new_errArray[i] = wstd new_cohArray[i] = 1.0 dtArray = np.vstack((dtArray, new_dtArray)) errArray = np.vstack((errArray, new_errArray)) cohArray = np.vstack((cohArray, new_cohArray)) pairArray.append(group) del new_cohArray, new_dtArray, new_errArray,\ cohindex, errindex, dtindex, wavg, wstd # END OF GROUP HANDLING # then process all pairs + the ALL if len(dtArray.shape) == 1: # if there is only one pair: dtArray = dtArray.values.reshape((1, dtArray.shape[0])) cohArray = cohArray.values.reshape((1, cohArray.shape[0])) errArray = errArray.values.reshape((1, errArray.shape[0])) used = np.zeros(dtArray.shape) for i, pair in enumerate(pairArray): cohindex = np.where(cohArray[i] >= params.dtt_mincoh)[0] errindex = np.where(errArray[i] <= params.dtt_maxerr)[0] dtindex = np.where(np.abs(dtArray[i]) <= params.dtt_maxdt)[0] #~ index = np.intersect1d(tindex, cohindex) index = np.intersect1d(cohindex, errindex) index = np.intersect1d(index, dtindex) used[i][index] = 1.0 w = 1.0 / errArray[i][index] w[~np.isfinite(w)] = 1.0 VecXfilt = tArray[index] VecYfilt = dtArray[i][index] if len(VecYfilt) >= 2: m, a, em, ea = linear_regression( VecXfilt, VecYfilt, w, intercept_origin=False) m0, em0 = linear_regression( VecXfilt, VecYfilt, w, intercept_origin=True) M.append(m) EM.append(em) A.append(a) EA.append(ea) M0.append(m0) EM0.append(em0) Dates.append(current) Pairs.append(pair) del m, a, em, ea, m0, em0 del VecXfilt, VecYfilt, w del index, cohindex, errindex, dtindex logger.debug( "%s: exporting: %i pairs" % (current, len(pairArray))) df = pd.DataFrame( {'Pairs': Pairs, 'M': M, 'EM': EM, 'A': A, 'EA': EA, 'M0': M0, 'EM0': EM0}, index=pd.DatetimeIndex(Dates)) # Needs to be changed ! output = os.path.join( 'DTT', "%02i" % filterid, "%03i_DAYS" % mov_stack, components) if not os.path.isdir(output): os.makedirs(output) df.to_csv( os.path.join(output, '%s.txt' % current), index_label='Date') del df, M, EM, A, EA, M0, EM0, Pairs, Dates, used del tArray, dtArray, errArray, cohArray, pairArray del output # THIS SHOULD BE IN THE API massive_update_job(db, jobs, "D") logger.info('*** Finished: Compute DT/T ***')
def mwcs(current, reference, freqmin, freqmax, df, tmin, window_length, step, smoothing_half_win=5): """The `current` time series is compared to the `reference`. Both time series are sliced in several overlapping windows. Each slice is mean-adjusted and cosine-tapered (85% taper) before being Fourier- transformed to the frequency domain. :math:`F_{cur}(\\nu)` and :math:`F_{ref}(\\nu)` are the first halves of the Hermitian symmetric Fourier-transformed segments. The cross-spectrum :math:`X(\\nu)` is defined as :math:`X(\\nu) = F_{ref}(\\nu) F_{cur}^*(\\nu)` in which :math:`{}^*` denotes the complex conjugation. :math:`X(\\nu)` is then smoothed by convolution with a Hanning window. The similarity of the two time-series is assessed using the cross-coherency between energy densities in the frequency domain: :math:`C(\\nu) = \\frac{|\overline{X(\\nu))}|}{\sqrt{|\overline{F_{ref}(\\nu)|^2} |\overline{F_{cur}(\\nu)|^2}}}` in which the over-line here represents the smoothing of the energy spectra for :math:`F_{ref}` and :math:`F_{cur}` and of the spectrum of :math:`X`. The mean coherence for the segment is defined as the mean of :math:`C(\\nu)` in the frequency range of interest. The time-delay between the two cross correlations is found in the unwrapped phase, :math:`\phi(\nu)`, of the cross spectrum and is linearly proportional to frequency: :math:`\phi_j = m. \nu_j, m = 2 \pi \delta t` The time shift for each window between two signals is the slope :math:`m` of a weighted linear regression of the samples within the frequency band of interest. The weights are those introduced by [Clarke2011]_, which incorporate both the cross-spectral amplitude and cross-coherence, unlike [Poupinet1984]_. The errors are estimated using the weights (thus the coherence) and the squared misfit to the modelled slope: :math:`e_m = \sqrt{\sum_j{(\\frac{w_j \\nu_j}{\sum_i{w_i \\nu_i^2}})^2}\sigma_{\phi}^2}` where :math:`w` are weights, :math:`\\nu` are cross-coherences and :math:`\sigma_{\phi}^2` is the squared misfit of the data to the modelled slope and is calculated as :math:`\sigma_{\phi}^2 = \\frac{\sum_j(\phi_j - m \\nu_j)^2}{N-1}` The output of this process is a table containing, for each moving window: the central time lag, the measured delay, its error and the mean coherence of the segment. .. warning:: The time series will not be filtered before computing the cross-spectrum! They should be band-pass filtered around the `freqmin`-`freqmax` band of interest beforehand. :type current: :class:`numpy.ndarray` :param current: The "Current" timeseries :type reference: :class:`numpy.ndarray` :param reference: The "Reference" timeseries :type freqmin: float :param freqmin: The lower frequency bound to compute the dephasing (in Hz) :type freqmax: float :param freqmax: The higher frequency bound to compute the dephasing (in Hz) :type df: float :param df: The sampling rate of the input timeseries (in Hz) :type tmin: float :param tmin: The leftmost time lag (used to compute the "time lags array") :type window_length: float :param window_length: The moving window length (in seconds) :type step: float :param step: The step to jump for the moving window (in seconds) :type smoothing_half_win: int :param smoothing_half_win: If different from 0, defines the half length of the smoothing hanning window. :rtype: :class:`numpy.ndarray` :returns: [time_axis,delta_t,delta_err,delta_mcoh]. time_axis contains the central times of the windows. The three other columns contain dt, error and mean coherence for each window. """ delta_t = [] delta_err = [] delta_mcoh = [] time_axis = [] window_length_samples = np.int(window_length * df) # try: # from scipy.fftpack.helper import next_fast_len # except ImportError: # from obspy.signal.util import next_pow_2 as next_fast_len from msnoise.api import nextpow2 padd = int(2**(nextpow2(window_length_samples) + 2)) # padd = next_fast_len(window_length_samples) count = 0 tp = cosine_taper(window_length_samples, 0.85) minind = 0 maxind = window_length_samples while maxind <= len(current): cci = current[minind:(minind + window_length_samples)] cci = scipy.signal.detrend(cci, type='linear') cci *= tp cri = reference[minind:(minind + window_length_samples)] cri = scipy.signal.detrend(cri, type='linear') cri *= tp minind += int(step * df) maxind += int(step * df) fcur = scipy.fftpack.fft(cci, n=padd)[:padd // 2] fref = scipy.fftpack.fft(cri, n=padd)[:padd // 2] fcur2 = np.real(fcur)**2 + np.imag(fcur)**2 fref2 = np.real(fref)**2 + np.imag(fref)**2 # Calculate the cross-spectrum X = fref * (fcur.conj()) if smoothing_half_win != 0: dcur = np.sqrt( smooth(fcur2, window='hanning', half_win=smoothing_half_win)) dref = np.sqrt( smooth(fref2, window='hanning', half_win=smoothing_half_win)) X = smooth(X, window='hanning', half_win=smoothing_half_win) else: dcur = np.sqrt(fcur2) dref = np.sqrt(fref2) dcs = np.abs(X) # Find the values the frequency range of interest freq_vec = scipy.fftpack.fftfreq(len(X) * 2, 1. / df)[:padd // 2] index_range = np.argwhere( np.logical_and(freq_vec >= freqmin, freq_vec <= freqmax)) # Get Coherence and its mean value coh = getCoherence(dcs, dref, dcur) mcoh = np.mean(coh[index_range]) # Get Weights w = 1.0 / (1.0 / (coh[index_range]**2) - 1.0) w[coh[index_range] >= 0.99] = 1.0 / (1.0 / 0.9801 - 1.0) w = np.sqrt(w * np.sqrt(dcs[index_range])) w = np.real(w) # Frequency array: v = np.real(freq_vec[index_range]) * 2 * np.pi # Phase: phi = np.angle(X) phi[0] = 0. phi = np.unwrap(phi) phi = phi[index_range] # Calculate the slope with a weighted least square linear regression # forced through the origin # weights for the WLS must be the variance ! m, em = linear_regression(v.flatten(), phi.flatten(), w.flatten()) delta_t.append(m) # print phi.shape, v.shape, w.shape e = np.sum((phi - m * v)**2) / (np.size(v) - 1) s2x2 = np.sum(v**2 * w**2) sx2 = np.sum(w * v**2) e = np.sqrt(e * s2x2 / sx2**2) delta_err.append(e) delta_mcoh.append(np.real(mcoh)) time_axis.append(tmin + window_length / 2. + count * step) count += 1 del fcur, fref del X del freq_vec del index_range del w, v, e, s2x2, sx2, m, em if maxind > len(current) + step * df: logging.warning("The last window was too small, but was computed") return np.array([time_axis, delta_t, delta_err, delta_mcoh]).T
def mwcs(current, reference, freqmin, freqmax, df, tmin, window_length, step, smoothing_half_win=5): """The `current` time series is compared to the `reference`. Both time series are sliced in several overlapping windows. Each slice is mean-adjusted and cosine-tapered (85% taper) before being Fourier- transformed to the frequency domain. :math:`F_{cur}(\\nu)` and :math:`F_{ref}(\\nu)` are the first halves of the Hermitian symmetric Fourier-transformed segments. The cross-spectrum :math:`X(\\nu)` is defined as :math:`X(\\nu) = F_{ref}(\\nu) F_{cur}^*(\\nu)` in which :math:`{}^*` denotes the complex conjugation. :math:`X(\\nu)` is then smoothed by convolution with a Hanning window. The similarity of the two time-series is assessed using the cross-coherency between energy densities in the frequency domain: :math:`C(\\nu) = \\frac{|\overline{X(\\nu))}|}{\sqrt{|\overline{F_{ref}(\\nu)|^2} |\overline{F_{cur}(\\nu)|^2}}}` in which the over-line here represents the smoothing of the energy spectra for :math:`F_{ref}` and :math:`F_{cur}` and of the spectrum of :math:`X`. The mean coherence for the segment is defined as the mean of :math:`C(\\nu)` in the frequency range of interest. The time-delay between the two cross correlations is found in the unwrapped phase, :math:`\phi(\nu)`, of the cross spectrum and is linearly proportional to frequency: :math:`\phi_j = m. \nu_j, m = 2 \pi \delta t` The time shift for each window between two signals is the slope :math:`m` of a weighted linear regression of the samples within the frequency band of interest. The weights are those introduced by [Clarke2011]_, which incorporate both the cross-spectral amplitude and cross-coherence, unlike [Poupinet1984]_. The errors are estimated using the weights (thus the coherence) and the squared misfit to the modelled slope: :math:`e_m = \sqrt{\sum_j{(\\frac{w_j \\nu_j}{\sum_i{w_i \\nu_i^2}})^2}\sigma_{\phi}^2}` where :math:`w` are weights, :math:`\\nu` are cross-coherences and :math:`\sigma_{\phi}^2` is the squared misfit of the data to the modelled slope and is calculated as :math:`\sigma_{\phi}^2 = \\frac{\sum_j(\phi_j - m \\nu_j)^2}{N-1}` The output of this process is a table containing, for each moving window: the central time lag, the measured delay, its error and the mean coherence of the segment. .. warning:: The time series will not be filtered before computing the cross-spectrum! They should be band-pass filtered around the `freqmin`-`freqmax` band of interest beforehand. :type current: :class:`numpy.ndarray` :param current: The "Current" timeseries :type reference: :class:`numpy.ndarray` :param reference: The "Reference" timeseries :type freqmin: float :param freqmin: The lower frequency bound to compute the dephasing (in Hz) :type freqmax: float :param freqmax: The higher frequency bound to compute the dephasing (in Hz) :type df: float :param df: The sampling rate of the input timeseries (in Hz) :type tmin: float :param tmin: The leftmost time lag (used to compute the "time lags array") :type window_length: float :param window_length: The moving window length (in seconds) :type step: float :param step: The step to jump for the moving window (in seconds) :type smoothing_half_win: int :param smoothing_half_win: If different from 0, defines the half length of the smoothing hanning window. :rtype: :class:`numpy.ndarray` :returns: [time_axis,delta_t,delta_err,delta_mcoh]. time_axis contains the central times of the windows. The three other columns contain dt, error and mean coherence for each window. """ delta_t = [] delta_err = [] delta_mcoh = [] time_axis = [] window_length_samples = np.int(window_length * df) # try: # from scipy.fftpack.helper import next_fast_len # except ImportError: # from obspy.signal.util import next_pow_2 as next_fast_len from msnoise.api import nextpow2 padd = int(2 ** (nextpow2(window_length_samples) + 2)) # padd = next_fast_len(window_length_samples) count = 0 tp = cosine_taper(window_length_samples, 0.85) minind = 0 maxind = window_length_samples while maxind <= len(current): cci = current[minind:(minind + window_length_samples)] cci = scipy.signal.detrend(cci, type='linear') cci *= tp cri = reference[minind:(minind + window_length_samples)] cri = scipy.signal.detrend(cri, type='linear') cri *= tp minind += int(step*df) maxind += int(step*df) fcur = scipy.fftpack.fft(cci, n=padd)[:padd // 2] fref = scipy.fftpack.fft(cri, n=padd)[:padd // 2] fcur2 = np.real(fcur) ** 2 + np.imag(fcur) ** 2 fref2 = np.real(fref) ** 2 + np.imag(fref) ** 2 # Calculate the cross-spectrum X = fref * (fcur.conj()) if smoothing_half_win != 0: dcur = np.sqrt(smooth(fcur2, window='hanning', half_win=smoothing_half_win)) dref = np.sqrt(smooth(fref2, window='hanning', half_win=smoothing_half_win)) X = smooth(X, window='hanning', half_win=smoothing_half_win) else: dcur = np.sqrt(fcur2) dref = np.sqrt(fref2) dcs = np.abs(X) # Find the values the frequency range of interest freq_vec = scipy.fftpack.fftfreq(len(X) * 2, 1. / df)[:padd // 2] index_range = np.argwhere(np.logical_and(freq_vec >= freqmin, freq_vec <= freqmax)) # Get Coherence and its mean value coh = getCoherence(dcs, dref, dcur) mcoh = np.mean(coh[index_range]) # Get Weights w = 1.0 / (1.0 / (coh[index_range] ** 2) - 1.0) w[coh[index_range] >= 0.99] = 1.0 / (1.0 / 0.9801 - 1.0) w = np.sqrt(w * np.sqrt(dcs[index_range])) w = np.real(w) # Frequency array: v = np.real(freq_vec[index_range]) * 2 * np.pi # Phase: phi = np.angle(X) phi[0] = 0. phi = np.unwrap(phi) phi = phi[index_range] # Calculate the slope with a weighted least square linear regression # forced through the origin # weights for the WLS must be the variance ! m, em = linear_regression(v.flatten(), phi.flatten(), w.flatten()) delta_t.append(m) # print phi.shape, v.shape, w.shape e = np.sum((phi - m * v) ** 2) / (np.size(v) - 1) s2x2 = np.sum(v ** 2 * w ** 2) sx2 = np.sum(w * v ** 2) e = np.sqrt(e * s2x2 / sx2 ** 2) delta_err.append(e) delta_mcoh.append(np.real(mcoh)) time_axis.append(tmin+window_length/2.+count*step) count += 1 del fcur, fref del X del freq_vec del index_range del w, v, e, s2x2, sx2, m, em if maxind > len(current) + step*df: logging.warning("The last window was too small, but was computed") return np.array([time_axis, delta_t, delta_err, delta_mcoh]).T
def main(loglevel="INFO"): logger = logbook.Logger(__name__) # Reconfigure logger to show the pid number in log records logger = get_logger('msnoise.compute_mwcs_child', loglevel, with_pid=True) logger.info('*** Starting: Compute MWCS ***') db = connect() export_format = get_config(db, 'export_format') if export_format == "BOTH": extension = ".MSEED" else: extension = "." + export_format mov_stack = get_config(db, "mov_stack") if mov_stack.count(',') == 0: mov_stacks = [ int(mov_stack), ] else: mov_stacks = [int(mi) for mi in mov_stack.split(',')] goal_sampling_rate = float(get_config(db, "cc_sampling_rate")) maxlag = float(get_config(db, "maxlag")) params = get_params(db) # First we reset all DTT jobs to "T"odo if the REF is new for a given pair # for station1, station2 in get_station_pairs(db, used=True): # sta1 = "%s.%s" % (station1.net, station1.sta) # sta2 = "%s.%s" % (station2.net, station2.sta) # pair = "%s:%s" % (sta1, sta2) # if is_dtt_next_job(db, jobtype='DTT', ref=pair): # logger.info( # "We will recompute all MWCS based on the new REF for %s" % pair) # reset_dtt_jobs(db, pair) # update_job(db, "REF", pair, jobtype='DTT', flag='D') # logger.debug('Ready to compute') # Then we compute the jobs outfolders = [] filters = get_filters(db, all=False) time.sleep(np.random.random() * 5) smoothing_half_win = 5 hanningwindow = get_window("hanning", smoothing_half_win) while is_dtt_next_job(db, flag='T', jobtype='MWCS'): # TODO would it be possible to make the next 8 lines in the API ? jobs = get_dtt_next_job(db, flag='T', jobtype='MWCS') if not len(jobs): # edge case, should only occur when is_next returns true, but # get_next receives no jobs (heavily parallelised calls). time.sleep(np.random.random()) continue pair = jobs[0].pair refs, days = zip(*[[job.ref, job.day] for job in jobs]) logger.info("There are MWCS jobs for some days to recompute for %s" % pair) for f in filters: filterid = int(f.ref) freqmin = f.mwcs_low freqmax = f.mwcs_high for components in params.all_components: ref_name = pair.replace(':', '_') station1, station2 = pair.split(":") ref = get_ref(db, station1, station2, filterid, components, params) if not len(ref): continue ref = ref.data for mov_stack in mov_stacks: output = [] fn = r"STACKS2\%02i\%03i_DAYS\%s\%s_%s.h5" % ( filterid, mov_stack, components, station1, station2) print("Reading %s" % fn) data = pd.read_hdf(fn) valid = data.index.intersection(pd.to_datetime(days)) data = data.loc[valid] data = data.dropna() # work on 2D mwcs: window_length_samples = np.int(f.mwcs_wlen * goal_sampling_rate) padd = int(2**(nextpow2(window_length_samples) + 2)) count = 0 tp = cosine_taper(window_length_samples, 0.85) minind = 0 maxind = window_length_samples freq_vec = sf.fftfreq(padd, 1. / goal_sampling_rate)[:padd // 2] # Find the values the frequency range of interest index_range = np.argwhere( np.logical_and(freq_vec >= freqmin, freq_vec <= freqmax)).flatten() cci = np.empty((data.shape[0], window_length_samples)) while maxind <= data.shape[1]: cci[:] = data.iloc[:, minind:( minind + window_length_samples)].values scipy.signal.detrend(cci, type="linear", axis=1, overwrite_data=True) for i in range(cci.shape[0]): cci[i] *= tp cri = ref[minind:(minind + window_length_samples)].copy() scipy.signal.detrend(cri, type='linear', overwrite_data=True) cri *= tp minind += int(f.mwcs_step * goal_sampling_rate) maxind += int(f.mwcs_step * goal_sampling_rate) fcur = sf.fft(cci, axis=1, n=padd)[:, :padd // 2] fref = sf.fft(cri, n=padd)[:padd // 2] fcur2 = np.real(fcur)**2 + np.imag(fcur)**2 fcur2 = fcur2.astype(np.float64) fref2 = np.real(fref)**2 + np.imag(fref)**2 fref2 = fref2.astype(np.float64) X = fref * fcur.conj() if smoothing_half_win != 0: for i in range(fcur2.shape[0]): fcur2[i] = np.sqrt( scipy.signal.convolve( fcur2[i], hanningwindow.astype(np.float64), "same")) fref2 = np.sqrt( scipy.signal.convolve( fref2, hanningwindow.astype(np.float64), "same")) for i in range(X.shape[0]): X[i] = scipy.signal.convolve( X[i], hanningwindow, "same") else: fcur2 = fcur2.apply(np.sqrt) fref2 = fref2.apply(np.sqrt) dcs = np.abs(X) # Get Coherence and its mean value W = [] MCOH = [] for i in range(dcs.shape[0]): coh = getCoherence(dcs[i, index_range], fref2[index_range], fcur2[i, index_range]) mcoh = np.mean(coh) MCOH.append(np.real(mcoh)) # Get Weights w = 1.0 / (1.0 / (coh**2) - 1.0) w[coh >= 0.99] = 1.0 / (1.0 / 0.9801 - 1.0) w = np.sqrt(w * np.sqrt(dcs[i][index_range])) w = np.real(w) W.append(w) W = np.asarray(W) # # Frequency array: v = np.real(freq_vec[index_range]) * 2 * np.pi # Phase: phi = np.angle(X) phi = phi.astype(np.float64) phi[:, 0] = 0.0 phi = np.unwrap(phi, axis=1) phi = phi[:, index_range] # Calculate the slope with a weighted least square linear regression # forced through the origin # weights for the WLS must be the variance ! result = np.array([ linear_regression( v.flatten(), phi[i].flatten(), W[i].flatten(), ) for i in range(phi.shape[0]) ]) M = result[:, 0] e = np.sum( (phi - np.outer(M, v))**2, axis=1) / (len(v) - 1) s2x2 = np.sum(v**2 * W**2, axis=1) sx2 = np.sum(W * v**2, axis=1) E = np.sqrt(e * s2x2 / sx2**2) ti = -params.maxlag + f.mwcs_wlen / 2. + count * f.mwcs_step # print("Finished processing t_center=", ti, "s") S = pd.DataFrame(np.array([M, E, MCOH]).T, index=data.index, columns=["M", "EM", "MCOH"]) S.columns = pd.MultiIndex.from_product([[ti], S.columns]) output.append(S) count += 1 del fcur, fref, fcur2, fref2, result, cri del X del M, E, MCOH output = pd.concat(output, axis=1) fn = r"MWCS2\%02i\%03i_DAYS\%s\%s_%s.h5" % ( filterid, mov_stack, components, station1, station2) if not os.path.isdir(os.path.split(fn)[0]): os.makedirs(os.path.split(fn)[0]) output.to_hdf(fn, key="MWCS") del output logger.info('*** Finished: Compute MWCS ***')
def dtw_dvv(ref, cur, para, maxLag, b, direction): """ Dynamic time warping for dv/v estimation. PARAMETERS: ---------------- ref : reference signal (np.array, size N) cur : current signal (np.array, size N) para: dict containing useful parameters about the data window and targeted frequency maxLag : max number of points to search forward and backward. Suggest setting it larger if window is set larger. b : b-value to limit strain, which is to limit the maximum velocity perturbation. See equation 11 in (Mikesell et al. 2015) direction: direction to accumulate errors (1=forward, -1=backward) RETURNS: ------------------ -m0 : estimated dv/v em0 : error of dv/v estimation Original by Di Yang Last modified by Dylan Mikesell (25 Feb. 2015) Translated to python by Tim Clements (17 Aug. 2018) """ t = para['t'] twin = para['twin'] dt = para['dt'] tmin = np.min(twin) tmax = np.max(twin) itvec = np.arange( np.int((tmin - t.min()) / dt) + 1, np.int((tmax - t.min()) / dt) + 1) tvec = t[itvec] # setup other parameters npts = len(ref) # number of time samples # compute error function over lags, which is independent of strain limit 'b'. err = computeErrorFunction(cur, ref, npts, maxLag) # direction to accumulate errors (1=forward, -1=backward) dist = accumulateErrorFunction(direction, err, npts, maxLag, b) stbar = backtrackDistanceFunction(-1 * direction, dist, err, -maxLag, b) stbarTime = stbar * dt # convert from samples to time # cut the first and last 5% for better regression # indx = np.where((tvec>=0.05*npts*dt) & (tvec<=0.95*npts*dt))[0] indx = np.where((tvec >= (0.05 * npts * dt + tmin)) & (tvec <= (0.95 * npts * dt + tmin)))[0] # linear regression to get dv/v if npts > 2: # weights w = np.ones(npts) #m, a, em, ea = linear_regression(time_axis[indx], delta_t[indx], w, intercept_origin=False) m0, em0 = linear_regression(tvec.flatten()[indx], stbarTime.flatten()[indx], w.flatten()[indx], intercept_origin=True) else: print('not enough points to estimate dv/v for dtw') m0 = 0 em0 = 0 return m0 * 100, em0 * 100, dist
def wxs_dvv(ref, cur, allfreq, para, dj=1 / 12, s0=-1, J=-1, sig=False, wvn='morlet', unwrapflag=False): """ Compute dt or dv/v in time and frequency domain from wavelet cross spectrum (wxs). for all frequecies in an interest range Parameters -------------- ref: The "Reference" timeseries (numpy.ndarray) cur: The "Current" timeseries (numpy.ndarray) allfreq: a boolen variable to make measurements on all frequency range or not para: a dict containing freq/time info of the data matrix dj, s0, J, sig, wvn: common parameters used in 'wavelet.wct' unwrapflag: True - unwrap phase delays. Default is False RETURNS: ------------------ dvv*100 : estimated dv/v in % err*100 : error of dv/v estimation in % Originally written by Tim Clements (1 March, 2019) Modified by Congcong Yuan (30 June, 2019) based on (Mao et al. 2019). Updated by Chengxin Jiang (10 Oct, 2019) to merge the functionality for mesurements across all frequency and one freq range """ # common variables t = para['t'] twin = para['twin'] freq = para['freq'] dt = para['dt'] tmin = np.min(twin) tmax = np.max(twin) fmin = np.min(freq) fmax = np.max(freq) itvec = np.arange( np.int((tmin - t.min()) / dt) + 1, np.int((tmax - t.min()) / dt) + 1) tvec = t[itvec] npts = len(tvec) # perform cross coherent analysis, modified from function 'wavelet.cwt' WCT, aWCT, coi, freq, sig = wct_modified(ref, cur, dt, dj=dj, s0=s0, J=J, sig=sig, wavelet=wvn, normalize=True) if unwrapflag: phase = np.unwrap( aWCT, axis=-1 ) # axis=0, upwrap along time; axis=-1, unwrap along frequency else: phase = aWCT # zero out data outside frequency band if (fmax > np.max(freq)) | (fmax <= fmin): raise ValueError('Abort: input frequency out of limits!') else: freq_indin = np.where((freq >= fmin) & (freq <= fmax))[0] # follow MWCS to do two steps of linear regression if not allfreq: delta_t_m, delta_t_unc = np.zeros(npts, dtype=np.float32), np.zeros( npts, dtype=np.float32) # assume the tvec is the time window to measure dt for it in range(npts): w = 1 / WCT[freq_indin, itvec[it]] w[~np.isfinite(w)] = 1. delta_t_m[it], delta_t_unc[it] = linear_regression( freq[freq_indin] * 2 * np.pi, phase[freq_indin, itvec[it]], w) # new weights for regression wWCT = WCT[:, itvec] w2 = 1 / np.mean(wWCT[freq_indin, ], axis=0) w2[~np.isfinite(w2)] = 1. # now use dt and t to get dv/v if len(w2) > 2: if not np.any(delta_t_m): dvv, err = np.nan, np.nan m, em = linear_regression(tvec, delta_t_m, w2, intercept_origin=True) dvv, err = -m, em else: print('not enough points to estimate dv/v for wts') dvv, err = np.nan, np.nan return dvv * 100, err * 100 # convert phase directly to delta_t for all frequencies else: # convert phase delay to time delay delta_t = phase / (2 * np.pi * freq[:, None] ) # normalize phase by (2*pi*frequency) dvv, err = np.zeros(freq_indin.shape), np.zeros(freq_indin.shape) # loop through freq for linear regression for ii, ifreq in enumerate(freq_indin): if len(tvec) > 2: if not np.any(delta_t[ifreq]): continue # how to better approach the uncertainty of delta_t w = 1 / WCT[ifreq, itvec] w[~np.isfinite(w)] = 1.0 #m, a, em, ea = linear_regression(time_axis[indx], delta_t[indx], w, intercept_origin=False) m, em = linear_regression(tvec, delta_t[ifreq, itvec], w, intercept_origin=True) dvv[ii], err[ii] = -m, em else: print('not enough points to estimate dv/v for wts') dvv[ii], err[ii] = np.nan, np.nan return freq[freq_indin], dvv * 100, err * 100
def main(interval=1, loglevel="INFO"): logger = logbook.Logger(__name__) # Reconfigure logger to show the pid number in log records logger = get_logger('msnoise.compute_dtt_child', loglevel, with_pid=True) logger.info('*** Starting: Compute DT/T ***') db = connect() params = get_params(db) start, end, datelist = build_movstack_datelist(db) mov_stack = get_config(db, "mov_stack") if mov_stack.count(',') == 0: mov_stacks = [ int(mov_stack), ] else: mov_stacks = [int(mi) for mi in mov_stack.split(',')] components_to_compute = get_components_to_compute(db) updated_dtt = updated_days_for_dates( db, start, end, '%', jobtype='DTT', returndays=True, interval=datetime.timedelta(days=interval)) interstations = {} for sta1, sta2 in get_station_pairs(db): s1 = "%s_%s" % (sta1.net, sta1.sta) s2 = "%s_%s" % (sta2.net, sta2.sta) if s1 == s2: interstations["%s_%s" % (s1, s2)] = 0.0 else: interstations["%s_%s" % (s1, s2)] = get_interstation_distance( sta1, sta2, sta1.coordinates) filters = get_filters(db, all=False) while is_dtt_next_job(db, flag='T', jobtype='DTT'): # TODO would it be possible to make the next 8 lines in the API ? jobs = get_dtt_next_job(db, flag='T', jobtype='DTT') if not len(jobs): # edge case, should only occur when is_next returns true, but # get_next receives no jobs (heavily parallelised calls). time.sleep(np.random.random()) continue pair = jobs[0].pair refs, days = zip(*[[job.ref, job.day] for job in jobs]) netsta1, netsta2 = pair.split(':') n1, s1, l1 = netsta1.split(".") n2, s2, l2 = netsta2.split(".") dpair = "%s_%s_%s_%s" % (n1, s1, n2, s2) dist = interstations[dpair] if dpair in interstations else 0.0 logger.info("There are DTT jobs for some days to recompute for %s" % pair) for f in filters: filterid = int(f.ref) freqmin = f.mwcs_low freqmax = f.mwcs_high for components in params.all_components: for mov_stack in mov_stacks: output = [] fn = r"MWCS2\%02i\%03i_DAYS\%s\%s_%s.h5" % ( filterid, mov_stack, components, netsta1, netsta2) print("Reading %s" % fn) mwcs = pd.read_hdf(fn) print(mwcs.head()) M = mwcs.xs("M", level=1, axis=1).copy() EM = mwcs.xs("EM", level=1, axis=1).copy() MCOH = mwcs.xs("MCOH", level=1, axis=1).copy() tArray = M.columns.values if params.dtt_lag == "static": lmlag = -params.dtt_minlag rmlag = params.dtt_minlag else: lmlag = -dist / params.dtt_v rmlag = dist / params.dtt_v lMlag = lmlag - params.dtt_width rMlag = rmlag + params.dtt_width if params.dtt_sides == "both": tindex = np.where(( (tArray >= lMlag) & (tArray <= lmlag)) | ( (tArray >= rmlag) & (tArray <= rMlag)))[0] elif params.dtt_sides == "left": tindex = \ np.where((tArray >= lMlag) & (tArray <= lmlag))[0] else: tindex = \ np.where((tArray >= rmlag) & (tArray <= rMlag))[0] tmp = np.setdiff1d(np.arange(len(tArray)), tindex) EM.iloc[:, tmp] = 1.0 MCOH.iloc[:, tmp] *= 0.0 MCOH[MCOH < params.dtt_mincoh] = 0.0 EM[EM > params.dtt_maxerr] *= 1.0 # TODO missing check on max_dt !! values = [] for i in range(len(M.index)): errArray = EM.iloc[i] dtArray = M.iloc[i] cohArray = MCOH.iloc[i] index = np.where((errArray != 1.0) & (cohArray != 0.0))[0] errArray = errArray.iloc[index] dtArray = dtArray.iloc[index] w = 1.0 / errArray w[~np.isfinite(w)] = 1.0 VecXfilt = tArray[index] VecYfilt = dtArray if len(VecYfilt) >= 2: m, a, em, ea = linear_regression( VecXfilt, VecYfilt, w, intercept_origin=False) m0, em0 = linear_regression(VecXfilt, VecYfilt, w, intercept_origin=True) values.append([m, em, a, ea, m0, em0]) values = pd.DataFrame( values, index=M.index, columns=["m", "em", "a", "ea", "m0", "em0"]) out = fn.replace("MWCS", "DTT") if not os.path.isdir(os.path.split(out)[0]): os.makedirs(os.path.split(out)[0]) values.to_hdf(out, "DTT")
def wxs_allfreq(cur, ref, allfreq, para, dj=1 / 12, s0=-1, J=-1, sig=False, wvn='morlet', unwrapflag=False): """ Compute dt or dv/v in time and frequency domain from wavelet cross spectrum (wxs). for all frequecies in an interest range Parameters -------------- :type cur: :class:`~numpy.ndarray` :param cur: 1d array. Cross-correlation measurements. :type ref: :class:`~numpy.ndarray` :param ref: 1d array. The reference trace. :type t: :class:`~numpy.ndarray` :param t: 1d array. Cross-correlation measurements. :param twindow: 1d array. [earlist time, latest time] time window limit :param fwindow: 1d array. [lowest frequncy, highest frequency] frequency window limit :params, dj, s0, J, sig, wvn, refer to function 'wavelet.wct' :unwrapflag: True - unwrap phase delays. Default is False :nwindow: the times of current period/frequency, which will be time window if windowflag is False :windowflag: if True, the given window 'twindow' will be used, otherwise, the current period*nwindow will be used as time window Originally written by Tim Clements (1 March, 2019) Modified by Congcong Yuan (30 June, 2019) based on (Mao et al. 2019). """ # common variables twin = para['twin'] freq = para['freq'] dt = para['dt'] tmin = np.min(twin) tmax = np.max(twin) fmin = np.min(freq) fmax = np.max(freq) tvec = np.arange(tmin, tmax, dt) # perform cross coherent analysis, modified from function 'wavelet.cwt' WCT, aWCT, coi, freq, sig = pycwt.wct(cur, ref, dt, dj=dj, s0=s0, J=J, sig=sig, wavelet=wvn, normalize=True) if unwrapflag: phase = np.unwrap( aWCT, axis=-1 ) # axis=0, upwrap along time; axis=-1, unwrap along frequency else: phase = aWCT # convert phase delay to time delay delta_t = phase / (2 * np.pi * freq[:, None] ) # normalize phase by (2*pi*frequency) # zero out data outside frequency band if (fmax > np.max(freq)) | (fmax <= fmin): raise ValueError('Abort: input frequency out of limits!') else: freq_indin = np.where((freq >= fmin) & (freq <= fmax))[0] # initialize arrays for dv/v measurements dvv, err = np.zeros(freq_indin.shape), np.zeros(freq_indin.shape) # loop through freq for linear regression for ii, ifreq in enumerate(freq_indin): if len(tvec) > 2: if not np.any(delta_t[ifreq]): continue #---- use WXA as weight for regression---- # w = 1.0 / (1.0 / (WCT[ifreq,:] ** 2) - 1.0) # w[WCT[ifreq,time_ind] >= 0.99] = 1.0 / (1.0 / 0.9801 - 1.0) # w = np.sqrt(w * np.sqrt(WXA[ifreq,time_ind])) # w = np.real(w) w = 1 / WCT[ifreq] w[~np.isfinite(w)] = 1.0 #m, a, em, ea = linear_regression(time_axis[indx], delta_t[indx], w, intercept_origin=False) m, em = linear_regression(tvec, delta_t[ifreq], w, intercept_origin=True) dvv[ii], err[ii] = -m, em else: print('not enough points to estimate dv/v') dvv[ii], err[ii] = np.nan, np.nan del WCT, aWCT, coi, sig, phase, delta_t del tvec, w, m, em if not allfreq: return np.mean(dvv) * 100, np.mean(err) * 100 else: return freq[freq_indin], dvv * 100, err * 100
def WCC_dvv(ref, cur, moving_window_length, slide_step, para): """ Windowed cross correlation (WCC) for dt or dv/v mesurement (Snieder et al. 2012) Parameters: ----------- ref: The "Reference" timeseries cur: The "Current" timeseries moving_window_length: The moving window length (in seconds) slide_step: The step to jump for the moving window (in seconds) para: a dict containing freq/time info of the data matrix Returns: ------------ time_axis: central times of the moving window delta_t: dt delta_err: error delta_mcoh: mean coherence for each window Written by Congcong Yuan (1 July, 2019) """ # common variables twin = para['twin'] dt = para['dt'] tmin = np.min(twin) tmax = np.max(twin) # parameter initialize delta_t = [] delta_t_coef = [] time_axis = [] # info on the moving window window_length_samples = np.int(moving_window_length / dt) count = 0 tp = cosine_taper(window_length_samples, 0.15) minind = 0 maxind = window_length_samples # loop through all sub-windows while maxind <= len(ref): cci = cur[minind:maxind] cci = scipy.signal.detrend(cci, type='linear') cci *= tp cri = ref[minind:maxind] cri = scipy.signal.detrend(cri, type='linear') cri *= tp minind += int(slide_step / dt) maxind += int(slide_step / dt) # normalize signals before cross correlation cci = (cci - cci.mean()) / cci.std() cri = (cri - cri.mean()) / cri.std() # get maximum correlation coefficient and its index cc2 = np.correlate(cci, cri, mode='same') cc2 = cc2 / np.sqrt((cci**2).sum() * (cri**2).sum()) imaxcc2 = np.where(cc2 == np.max(cc2))[0] maxcc2 = np.max(cc2) # get the time shift m = (imaxcc2 - ((maxind - minind) // 2)) * dt delta_t.append(m) delta_t_coef.append(maxcc2) time_axis.append(tmin + moving_window_length / 2. + count * slide_step) count += 1 del cci, cri, cc2, imaxcc2, maxcc2 del m if maxind > len(cur) + int(slide_step / dt): print("The last window was too small, but was computed") delta_t = np.array(delta_t) delta_t_coef = np.array(delta_t_coef) time_axis = np.array(time_axis) # linear regression to get dv/v if count > 2: # simple weight w = np.ones(count) #m, a, em, ea = linear_regression(time_axis[indx], delta_t[indx], w, intercept_origin=False) m0, em0 = linear_regression(time_axis.flatten(), delta_t.flatten(), w.flatten(), intercept_origin=True) else: print('not enough points to estimate dv/v') m0 = 0 em0 = 0 return -m0 * 100, em0 * 100
def mwcs_dvv(ref, cur, moving_window_length, slide_step, para, smoothing_half_win=5): """ Moving Window Cross Spectrum method to measure dv/v (relying on phi=2*pi*f*t in freq domain) PARAMETERS: ---------------- ref: Reference waveform (np.ndarray, size N) cur: Current waveform (np.ndarray, size N) moving_window_length: moving window length to calculate cross-spectrum (np.float, in sec) slide_step: steps in time to shift the moving window (np.float, in seconds) para: a dict containing parameters about input data window and frequency info, including delta->The sampling rate of the input timeseries (in Hz) window-> The target window for measuring dt/t freq-> The frequency bound to compute the dephasing (in Hz) tmin: The leftmost time lag (used to compute the "time lags array") smoothing_half_win: If different from 0, defines the half length of the smoothing hanning window. RETURNS: ------------------ time_axis: the central times of the windows. delta_t: dt delta_err:error delta_mcoh: mean coherence Originally from MSNoise by Thomas Lecocq. (https://github.com/ROBelgium/MSNoise/tree/master/msnoise) Modified by Chengxin Jiang """ # common variables twin = para['twin'] freq = para['freq'] dt = para['dt'] tmin = np.min(twin) tmax = np.max(twin) fmin = np.min(freq) fmax = np.max(freq) tvect = np.arange(tmin, tmax, dt) # parameter initialize delta_t = [] delta_err = [] delta_mcoh = [] time_axis = [] # info on the moving window window_length_samples = np.int(moving_window_length / dt) padd = int(2**(monitor_modules.nextpow2(window_length_samples) + 2)) count = 0 tp = cosine_taper(window_length_samples, 0.15) minind = 0 maxind = window_length_samples # loop through all sub-windows while maxind <= len(ref): cci = cur[minind:maxind] cci = scipy.signal.detrend(cci, type='linear') cci *= tp cri = ref[minind:maxind] cri = scipy.signal.detrend(cri, type='linear') cri *= tp minind += int(slide_step / dt) maxind += int(slide_step / dt) # do fft fcur = scipy.fftpack.fft(cci, n=padd)[:padd // 2] fref = scipy.fftpack.fft(cri, n=padd)[:padd // 2] fcur2 = np.real(fcur)**2 + np.imag(fcur)**2 fref2 = np.real(fref)**2 + np.imag(fref)**2 # get cross-spectrum & do filtering X = fref * (fcur.conj()) if smoothing_half_win != 0: dcur = np.sqrt( monitor_modules.smooth(fcur2, window='hanning', half_win=smoothing_half_win)) dref = np.sqrt( monitor_modules.smooth(fref2, window='hanning', half_win=smoothing_half_win)) X = monitor_modules.smooth(X, window='hanning', half_win=smoothing_half_win) else: dcur = np.sqrt(fcur2) dref = np.sqrt(fref2) dcs = np.abs(X) # Find the values the frequency range of interest freq_vec = scipy.fftpack.fftfreq(len(X) * 2, dt)[:padd // 2] index_range = np.argwhere( np.logical_and(freq_vec >= fmin, freq_vec <= fmax)) # Get Coherence and its mean value coh = monitor_modules.getCoherence(dcs, dref, dcur) mcoh = np.mean(coh[index_range]) # Get Weights w = 1.0 / (1.0 / (coh[index_range]**2) - 1.0) w[coh[index_range] >= 0.99] = 1.0 / (1.0 / 0.9801 - 1.0) w = np.sqrt(w * np.sqrt(dcs[index_range])) w = np.real(w) # Frequency array: v = np.real(freq_vec[index_range]) * 2 * np.pi # Phase: phi = np.angle(X) phi[0] = 0. phi = np.unwrap(phi) phi = phi[index_range] # Calculate the slope with a weighted least square linear regression # forced through the origin; weights for the WLS must be the variance ! m, em = linear_regression(v.flatten(), phi.flatten(), w.flatten()) delta_t.append(m) # print phi.shape, v.shape, w.shape e = np.sum((phi - m * v)**2) / (np.size(v) - 1) s2x2 = np.sum(v**2 * w**2) sx2 = np.sum(w * v**2) e = np.sqrt(e * s2x2 / sx2**2) delta_err.append(e) delta_mcoh.append(np.real(mcoh)) time_axis.append(tmin + moving_window_length / 2. + count * slide_step) count += 1 del fcur, fref del X del freq_vec del index_range del w, v, e, s2x2, sx2, m, em if maxind > len(cur) + int(slide_step / dt): print("The last window was too small, but was computed") # ensure all matrix are np array delta_t = np.array(delta_t) delta_err = np.array(delta_err) delta_mcoh = np.array(delta_mcoh) time_axis = np.array(time_axis) # ready for linear regression delta_mincho = 0.65 delta_maxerr = 0.1 delta_maxdt = 0.1 indx1 = np.where(delta_mcoh > delta_mincho) indx2 = np.where(delta_err < delta_maxerr) indx3 = np.where(delta_t < delta_maxdt) #-----find good dt measurements----- indx = np.intersect1d(indx1, indx2) indx = np.intersect1d(indx, indx3) if len(indx) > 2: #----estimate weight for regression---- w = 1 / delta_err[indx] w[~np.isfinite(w)] = 1.0 #---------do linear regression----------- #m, a, em, ea = linear_regression(time_axis[indx], delta_t[indx], w, intercept_origin=False) m0, em0 = linear_regression(time_axis[indx], delta_t[indx], w, intercept_origin=True) else: print('not enough points to estimate dv/v') m0 = 0 em0 = 0 return -m0 * 100, em0 * 100
def dtw_dvv(ref, cur, para, maxLag, b, direction): """ Dynamic time warping for dv/v estimation. PARAMETERS: ---------------- ref : reference signal (np.array, size N) cur : current signal (np.array, size N) para: dict containing useful parameters about the data window and targeted frequency maxLag : max number of points to search forward and backward. Suggest setting it larger if window is set larger. b : b-value to limit strain, which is to limit the maximum velocity perturbation. See equation 11 in (Mikesell et al. 2015) RETURNS: ------------------ -m0 : estimated dv/v em0 : error of dv/v estimation """ twin = para['twin'] dt = para['dt'] tmin = np.min(twin) tmax = np.max(twin) tvect = np.arange(tmin, tmax, dt) # setup other parameters npts = len(ref) # number of time samples # compute error function over lags, which is independent of strain limit 'b'. err = monitor_modules.computeErrorFunction(cur, ref, npts, maxLag) # direction to accumulate errors (1=forward, -1=backward) # it is instructive to flip the sign of +/-1 here to see how the function # changes as we start the backtracking on different sides of the traces. # Also change 'b' to see how this influences the solution for stbar. You # want to make sure you're doing things in the proper directions in each # step!!! dist = monitor_modules.accumulateErrorFunction(direction, err, npts, maxLag, b) stbar = monitor_modules.backtrackDistanceFunction(-1 * direction, dist, err, -maxLag, b) stbarTime = stbar * dt # convert from samples to time # linear regression to get dv/v if npts > 2: # weights w = np.ones(npts) #m, a, em, ea = linear_regression(time_axis[indx], delta_t[indx], w, intercept_origin=False) m0, em0 = linear_regression(tvect.flatten(), stbarTime.flatten(), w.flatten(), intercept_origin=True) else: print('not enough points to estimate dv/v for dtw') m0 = 0 em0 = 0 return m0 * 100, em0 * 100, dist
def main(interval=1): logging.basicConfig(level=logging.DEBUG, format='%(asctime)s [%(levelname)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') logging.info('*** Starting: Compute DT/T ***') db = connect() dtt_lag = get_config(db, "dtt_lag") dtt_v = float(get_config(db, "dtt_v")) dtt_minlag = float(get_config(db, "dtt_minlag")) dtt_width = float(get_config(db, "dtt_width")) dtt_sides = get_config(db, "dtt_sides") minCoh = float(get_config(db, "dtt_mincoh")) maxErr = float(get_config(db, "dtt_maxerr")) maxDt = float(get_config(db, "dtt_maxdt")) start, end, datelist = build_movstack_datelist(db) mov_stack = get_config(db, "mov_stack") if mov_stack.count(',') == 0: mov_stacks = [ int(mov_stack), ] else: mov_stacks = [int(mi) for mi in mov_stack.split(',')] components_to_compute = get_components_to_compute(db) updated_dtt = updated_days_for_dates( db, start, end, '%', jobtype='DTT', returndays=True, interval=datetime.timedelta(days=interval)) for f in get_filters(db, all=False): filterid = int(f.ref) for components in components_to_compute: for mov_stack in mov_stacks: logging.info('Loading mov=%i days for filter=%02i' % (mov_stack, filterid)) for current in updated_dtt: if current > datetime.date.today(): break logging.debug("Processing %s - %02i - %02i mov" % (current, filterid, mov_stack)) first = True for station1, station2 in get_station_pairs(db, used=True): sta1 = "%s_%s" % (station1.net, station1.sta) sta2 = "%s_%s" % (station2.net, station2.sta) pair = "%s_%s" % (sta1, sta2) day = os.path.join('MWCS', "%02i" % filterid, "%03i_DAYS" % mov_stack, components, pair, '%s.txt' % current) dist = get_interstation_distance( station1, station2, station1.coordinates) if dist == 0. and dtt_lag == "dynamic": logging.debug('%s: Distance is Zero?!' % pair) if os.path.isfile(day): df = pd.read_csv(day, delimiter=' ', header=None, index_col=0, names=['t', 'dt', 'err', 'coh']) tArray = df.index.values if dtt_lag == "static": lmlag = -dtt_minlag rmlag = dtt_minlag else: lmlag = -dist / dtt_v rmlag = dist / dtt_v lMlag = lmlag - dtt_width rMlag = rmlag + dtt_width if dtt_sides == "both": tindex = np.where(( (tArray >= lMlag) & (tArray <= lmlag)) | ((tArray >= rmlag) & (tArray <= rMlag)))[0] elif dtt_sides == "left": tindex = np.where((tArray >= lMlag) & (tArray <= lmlag))[0] else: tindex = np.where((tArray >= rmlag) & (tArray <= rMlag))[0] tmp = np.setdiff1d(np.arange(len(tArray)), tindex) df['err'][tmp] = 1.0 df['coh'][tmp] = 0.0 if first: tArray = df.index.values dtArray = df['dt'] errArray = df['err'] cohArray = df['coh'] pairArray = [ pair, ] first = False else: dtArray = np.vstack((dtArray, df['dt'])) errArray = np.vstack((errArray, df['err'])) cohArray = np.vstack((cohArray, df['coh'])) pairArray.append(pair) del df del day if not first: #~ tindex = np.tindwhere(((tArray >= lMlag) & (tArray <= lmlag)) | ( #~ (tArray >= rmlag) & (tArray <= rMlag)))[0] Dates = [] Pairs = [] M = [] EM = [] A = [] EA = [] M0 = [] EM0 = [] if len(pairArray) != 1: # first stack all pairs to a ALL mean pair, using # indexes of selected values: new_dtArray = np.zeros(len(tArray)) new_errArray = np.zeros(len(tArray)) + 9999 new_cohArray = np.zeros(len(tArray)) for i in range(len(tArray)): #~ if i in tindex: if 1: cohindex = np.where( cohArray[:, i] >= minCoh)[0] errindex = np.where( errArray[:, i] <= maxErr)[0] dtindex = np.where( np.abs(dtArray[:, i]) <= maxDt)[0] index = np.intersect1d(cohindex, errindex) index = np.intersect1d(index, dtindex) wavg, wstd = wavg_wstd( dtArray[:, i][index], errArray[:, i][index]) new_dtArray[i] = wavg new_errArray[i] = wstd new_cohArray[i] = 1.0 dtArray = np.vstack((dtArray, new_dtArray)) errArray = np.vstack((errArray, new_errArray)) cohArray = np.vstack((cohArray, new_cohArray)) pairArray.append("ALL") del new_cohArray, new_dtArray, new_errArray,\ cohindex, errindex, dtindex, wavg, wstd # then stack selected pais to GROUPS: groups = {} npairs = len(pairArray) - 1 for group in groups.keys(): new_dtArray = np.zeros(len(tArray)) new_errArray = np.zeros(len(tArray)) + 9999 new_cohArray = np.zeros(len(tArray)) pairindex = [] for j, pair in enumerate(pairArray[:npairs]): net1, sta1, net2, sta2 = pair.split('_') if sta1 in groups[group] and \ sta2 in groups[group]: pairindex.append(j) pairindex = np.array(pairindex) for i in range(len(tArray)): #~ if i in tindex: if 1: cohindex = np.where( cohArray[:, i] >= minCoh)[0] errindex = np.where( errArray[:, i] <= maxErr)[0] dtindex = np.where( np.abs(dtArray[:, i]) <= maxDt)[0] index = np.intersect1d( cohindex, errindex) index = np.intersect1d(index, dtindex) index = np.intersect1d( index, pairindex) wavg, wstd = wavg_wstd( dtArray[:, i][index], errArray[:, i][index]) new_dtArray[i] = wavg new_errArray[i] = wstd new_cohArray[i] = 1.0 dtArray = np.vstack((dtArray, new_dtArray)) errArray = np.vstack((errArray, new_errArray)) cohArray = np.vstack((cohArray, new_cohArray)) pairArray.append(group) del new_cohArray, new_dtArray, new_errArray,\ cohindex, errindex, dtindex, wavg, wstd # END OF GROUP HANDLING # then process all pairs + the ALL if len(dtArray.shape ) == 1: # if there is only one pair: dtArray = dtArray.reshape((1, dtArray.shape[0])) cohArray = cohArray.reshape((1, cohArray.shape[0])) errArray = errArray.reshape((1, errArray.shape[0])) used = np.zeros(dtArray.shape) for i, pair in enumerate(pairArray): cohindex = np.where(cohArray[i] >= minCoh)[0] errindex = np.where(errArray[i] <= maxErr)[0] dtindex = np.where(np.abs(dtArray[i]) <= maxDt)[0] #~ index = np.intersect1d(tindex, cohindex) index = np.intersect1d(cohindex, errindex) index = np.intersect1d(index, dtindex) used[i][index] = 1.0 w = 1.0 / errArray[i][index] w[~np.isfinite(w)] = 1.0 VecXfilt = tArray[index] VecYfilt = dtArray[i][index] if len(VecYfilt) >= 2: m, a, em, ea = linear_regression( VecXfilt, VecYfilt, w, intercept_origin=False) m0, em0 = linear_regression( VecXfilt, VecYfilt, w, intercept_origin=True) M.append(m) EM.append(em) A.append(a) EA.append(ea) M0.append(m0) EM0.append(em0) Dates.append(current) Pairs.append(pair) del m, a, em, ea, m0, em0 del VecXfilt, VecYfilt, w del index, cohindex, errindex, dtindex logging.debug("%s: exporting: %i pairs" % (current, len(pairArray))) df = pd.DataFrame( { 'Pairs': Pairs, 'M': M, 'EM': EM, 'A': A, 'EA': EA, 'M0': M0, 'EM0': EM0 }, index=pd.DatetimeIndex(Dates)) # Needs to be changed ! output = os.path.join('DTT', "%02i" % filterid, "%03i_DAYS" % mov_stack, components) if not os.path.isdir(output): os.makedirs(output) df.to_csv(os.path.join(output, '%s.txt' % current), index_label='Date') del df, M, EM, A, EA, M0, EM0, Pairs, Dates, used del tArray, dtArray, errArray, cohArray, pairArray del output logging.info('*** Finished: Compute DT/T ***')
def main(interval=1, loglevel="INFO"): logger = logbook.Logger(__name__) # Reconfigure logger to show the pid number in log records logger = get_logger('msnoise.compute_dtt_child', loglevel, with_pid=True) logger.info('*** Starting: Compute DT/T ***') db = connect() params = get_params(db) start, end, datelist = build_movstack_datelist(db) mov_stack = get_config(db, "mov_stack") if mov_stack.count(',') == 0: mov_stacks = [ int(mov_stack), ] else: mov_stacks = [int(mi) for mi in mov_stack.split(',')] components_to_compute = get_components_to_compute(db) updated_dtt = updated_days_for_dates( db, start, end, '%', jobtype='DTT', returndays=True, interval=datetime.timedelta(days=interval)) interstations = {} for sta1, sta2 in get_station_pairs(db): s1 = "%s_%s" % (sta1.net, sta1.sta) s2 = "%s_%s" % (sta2.net, sta2.sta) if s1 == s2: interstations["%s_%s" % (s1, s2)] = 0.0 else: interstations["%s_%s" % (s1, s2)] = get_interstation_distance( sta1, sta2, sta1.coordinates) filters = get_filters(db, all=False) while is_next_job(db, jobtype='DTT'): jobs = get_next_job(db, jobtype='DTT') stations = [] pairs = [] refs = [] for f in filters: filterid = int(f.ref) for components in params.all_components: for mov_stack in mov_stacks: logger.info('Loading mov=%i days for filter=%02i' % (mov_stack, filterid)) first = True for job in jobs: refs.append(job.ref) pairs.append(job.pair) netsta1, netsta2 = job.pair.split(':') stations.append(netsta1) stations.append(netsta2) current = job.day sta1 = netsta1.replace(".", "_") sta2 = netsta2.replace(".", "_") pair = "%s_%s" % (sta1, sta2) day = os.path.join('MWCS', "%02i" % filterid, "%03i_DAYS" % mov_stack, components, pair, '%s.txt' % current) # dist = get_interstation_distance(station1, station2, # station1.coordinates) dist = interstations[pair] if dist == 0. and params.dtt_lag == "dynamic": logger.debug('%s: Distance is Zero?!' % pair) if os.path.isfile(day): df = pd.read_csv(day, delimiter=' ', header=None, index_col=0, names=['t', 'dt', 'err', 'coh']) tArray = df.index.values if params.dtt_lag == "static": lmlag = -params.dtt_minlag rmlag = params.dtt_minlag else: lmlag = -dist / params.dtt_v rmlag = dist / params.dtt_v lMlag = lmlag - params.dtt_width rMlag = rmlag + params.dtt_width if params.dtt_sides == "both": tindex = np.where(( (tArray >= lMlag) & (tArray <= lmlag)) | ((tArray >= rmlag) & (tArray <= rMlag)))[0] elif params.dtt_sides == "left": tindex = np.where((tArray >= lMlag) & (tArray <= lmlag))[0] else: tindex = np.where((tArray >= rmlag) & (tArray <= rMlag))[0] tmp = np.setdiff1d(np.arange(len(tArray)), tindex) df['err'][tmp] = 1.0 df['coh'][tmp] = 0.0 if first: tArray = df.index.values dtArray = df['dt'] errArray = df['err'] cohArray = df['coh'] pairArray = [ pair, ] first = False else: dtArray = np.vstack((dtArray, df['dt'])) errArray = np.vstack((errArray, df['err'])) cohArray = np.vstack((cohArray, df['coh'])) pairArray.append(pair) del df del day if not first: #~ tindex = np.tindwhere(((tArray >= lMlag) & (tArray <= lmlag)) | ( #~ (tArray >= rmlag) & (tArray <= rMlag)))[0] Dates = [] Pairs = [] M = [] EM = [] A = [] EA = [] M0 = [] EM0 = [] if len(pairArray) != 1: # first stack all pairs to a ALL mean pair, using # indexes of selected values: new_dtArray = np.zeros(len(tArray)) new_errArray = np.zeros(len(tArray)) + 9999 new_cohArray = np.zeros(len(tArray)) for i in range(len(tArray)): #~ if i in tindex: if 1: cohindex = np.where( cohArray[:, i] >= params.dtt_mincoh)[0] errindex = np.where( errArray[:, i] <= params.dtt_maxerr)[0] dtindex = np.where( np.abs(dtArray[:, i]) <= params.dtt_maxdt)[0] index = np.intersect1d(cohindex, errindex) index = np.intersect1d(index, dtindex) wavg, wstd = wavg_wstd( dtArray[:, i][index], errArray[:, i][index]) new_dtArray[i] = wavg new_errArray[i] = wstd new_cohArray[i] = 1.0 dtArray = np.vstack((dtArray, new_dtArray)) errArray = np.vstack((errArray, new_errArray)) cohArray = np.vstack((cohArray, new_cohArray)) pairArray.append("ALL") del new_cohArray, new_dtArray, new_errArray,\ cohindex, errindex, dtindex, wavg, wstd # then stack selected pais to GROUPS: groups = {} npairs = len(pairArray) - 1 for group in groups.keys(): new_dtArray = np.zeros(len(tArray)) new_errArray = np.zeros(len(tArray)) + 9999 new_cohArray = np.zeros(len(tArray)) pairindex = [] for j, pair in enumerate(pairArray[:npairs]): net1, sta1, net2, sta2 = pair.split('_') if sta1 in groups[group] and \ sta2 in groups[group]: pairindex.append(j) pairindex = np.array(pairindex) for i in range(len(tArray)): #~ if i in tindex: if 1: cohindex = np.where( cohArray[:, i] >= params.dtt_mincoh )[0] errindex = np.where( errArray[:, i] <= params.dtt_maxerr )[0] dtindex = np.where( np.abs(dtArray[:, i]) <= params.dtt_maxdt)[0] index = np.intersect1d( cohindex, errindex) index = np.intersect1d(index, dtindex) index = np.intersect1d( index, pairindex) wavg, wstd = wavg_wstd( dtArray[:, i][index], errArray[:, i][index]) new_dtArray[i] = wavg new_errArray[i] = wstd new_cohArray[i] = 1.0 dtArray = np.vstack((dtArray, new_dtArray)) errArray = np.vstack((errArray, new_errArray)) cohArray = np.vstack((cohArray, new_cohArray)) pairArray.append(group) del new_cohArray, new_dtArray, new_errArray,\ cohindex, errindex, dtindex, wavg, wstd # END OF GROUP HANDLING # then process all pairs + the ALL if len(dtArray.shape ) == 1: # if there is only one pair: dtArray = dtArray.values.reshape( (1, dtArray.shape[0])) cohArray = cohArray.values.reshape( (1, cohArray.shape[0])) errArray = errArray.values.reshape( (1, errArray.shape[0])) used = np.zeros(dtArray.shape) for i, pair in enumerate(pairArray): cohindex = np.where( cohArray[i] >= params.dtt_mincoh)[0] errindex = np.where( errArray[i] <= params.dtt_maxerr)[0] dtindex = np.where( np.abs(dtArray[i]) <= params.dtt_maxdt)[0] #~ index = np.intersect1d(tindex, cohindex) index = np.intersect1d(cohindex, errindex) index = np.intersect1d(index, dtindex) used[i][index] = 1.0 w = 1.0 / errArray[i][index] w[~np.isfinite(w)] = 1.0 VecXfilt = tArray[index] VecYfilt = dtArray[i][index] if len(VecYfilt) >= 2: m, a, em, ea = linear_regression( VecXfilt, VecYfilt, w, intercept_origin=False) m0, em0 = linear_regression( VecXfilt, VecYfilt, w, intercept_origin=True) M.append(m) EM.append(em) A.append(a) EA.append(ea) M0.append(m0) EM0.append(em0) Dates.append(current) Pairs.append(pair) del m, a, em, ea, m0, em0 del VecXfilt, VecYfilt, w del index, cohindex, errindex, dtindex logger.debug("%s: exporting: %i pairs" % (current, len(pairArray))) df = pd.DataFrame( { 'Pairs': Pairs, 'M': M, 'EM': EM, 'A': A, 'EA': EA, 'M0': M0, 'EM0': EM0 }, index=pd.DatetimeIndex(Dates)) # Needs to be changed ! output = os.path.join('DTT', "%02i" % filterid, "%03i_DAYS" % mov_stack, components) if not os.path.isdir(output): os.makedirs(output) df.to_csv(os.path.join(output, '%s.txt' % current), index_label='Date') del df, M, EM, A, EA, M0, EM0, Pairs, Dates, used del tArray, dtArray, errArray, cohArray, pairArray del output # THIS SHOULD BE IN THE API massive_update_job(db, jobs, "D") logger.info('*** Finished: Compute DT/T ***')
def main(interval=1): logging.basicConfig(level=logging.DEBUG, format='%(asctime)s [%(levelname)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') logging.info('*** Starting: Compute DT/T ***') db = connect() dtt_lag = get_config(db, "dtt_lag") dtt_v = float(get_config(db, "dtt_v")) dtt_minlag = float(get_config(db, "dtt_minlag")) dtt_width = float(get_config(db, "dtt_width")) dtt_sides = get_config(db, "dtt_sides") minCoh = float(get_config(db, "dtt_mincoh")) maxErr = float(get_config(db, "dtt_maxerr")) maxDt = float(get_config(db, "dtt_maxdt")) start, end, datelist = build_movstack_datelist(db) mov_stack = get_config(db, "mov_stack") if mov_stack.count(',') == 0: mov_stacks = [int(mov_stack), ] else: mov_stacks = [int(mi) for mi in mov_stack.split(',')] components_to_compute = get_components_to_compute(db) updated_dtt = updated_days_for_dates( db, start, end, '%', jobtype='DTT', returndays=True, interval=datetime.timedelta(days=interval)) for f in get_filters(db, all=False): filterid = int(f.ref) for components in components_to_compute: for mov_stack in mov_stacks: logging.info('Loading mov=%i days for filter=%02i' % (mov_stack, filterid)) for current in updated_dtt: if current > datetime.date.today(): break logging.debug("Processing %s - %02i - %02i mov" % (current, filterid, mov_stack)) first = True for station1, station2 in get_station_pairs(db, used=True): sta1 = "%s_%s" % (station1.net, station1.sta) sta2 = "%s_%s" % (station2.net, station2.sta) pair = "%s_%s" % (sta1, sta2) day = os.path.join('MWCS', "%02i" % filterid, "%03i_DAYS" % mov_stack, components, pair, '%s.txt' % current) dist = get_interstation_distance(station1, station2, station1.coordinates) if dist == 0. and dtt_lag == "dynamic": logging.debug('%s: Distance is Zero?!' % pair) if os.path.isfile(day): df = pd.read_csv( day, delimiter=' ', header=None, index_col=0, names=['t', 'dt', 'err', 'coh']) tArray = df.index.values if dtt_lag == "static": lmlag = -dtt_minlag rmlag = dtt_minlag else: lmlag = -dist / dtt_v rmlag = dist / dtt_v lMlag = lmlag - dtt_width rMlag = rmlag + dtt_width if dtt_sides == "both": tindex = np.where(((tArray >= lMlag) & (tArray <= lmlag)) | ((tArray >= rmlag) & (tArray <= rMlag)))[0] elif dtt_sides == "left": tindex = np.where((tArray >= lMlag) & (tArray <= lmlag))[0] else: tindex = np.where((tArray >= rmlag) & (tArray <= rMlag))[0] tmp = np.setdiff1d(np.arange(len(tArray)),tindex) df['err'][tmp] = 1.0 df['coh'][tmp] = 0.0 if first: tArray = df.index.values dtArray = df['dt'] errArray = df['err'] cohArray = df['coh'] pairArray = [pair, ] first = False else: dtArray = np.vstack((dtArray, df['dt'])) errArray = np.vstack((errArray, df['err'])) cohArray = np.vstack((cohArray, df['coh'])) pairArray.append(pair) del df del day if not first: #~ tindex = np.tindwhere(((tArray >= lMlag) & (tArray <= lmlag)) | ( #~ (tArray >= rmlag) & (tArray <= rMlag)))[0] Dates = [] Pairs = [] M = [] EM = [] A = [] EA = [] M0 = [] EM0 = [] if len(pairArray) != 1: # first stack all pairs to a ALL mean pair, using # indexes of selected values: new_dtArray = np.zeros(len(tArray)) new_errArray = np.zeros(len(tArray)) + 9999 new_cohArray = np.zeros(len(tArray)) for i in range(len(tArray)): #~ if i in tindex: if 1: cohindex = np.where( cohArray[:, i] >= minCoh)[0] errindex = np.where( errArray[:, i] <= maxErr)[0] dtindex = np.where( np.abs(dtArray[:, i]) <= maxDt)[0] index = np.intersect1d(cohindex, errindex) index = np.intersect1d(index, dtindex) wavg, wstd = wavg_wstd( dtArray[:, i][index], errArray[:, i][index]) new_dtArray[i] = wavg new_errArray[i] = wstd new_cohArray[i] = 1.0 dtArray = np.vstack((dtArray, new_dtArray)) errArray = np.vstack((errArray, new_errArray)) cohArray = np.vstack((cohArray, new_cohArray)) pairArray.append("ALL") del new_cohArray, new_dtArray, new_errArray,\ cohindex, errindex, dtindex, wavg, wstd # then stack selected pais to GROUPS: groups = {} npairs = len(pairArray)-1 for group in groups.keys(): new_dtArray = np.zeros(len(tArray)) new_errArray = np.zeros(len(tArray)) + 9999 new_cohArray = np.zeros(len(tArray)) pairindex = [] for j, pair in enumerate(pairArray[:npairs]): net1, sta1, net2, sta2 = pair.split('_') if sta1 in groups[group] and \ sta2 in groups[group]: pairindex.append(j) pairindex = np.array(pairindex) for i in range(len(tArray)): #~ if i in tindex: if 1: cohindex = np.where( cohArray[:, i] >= minCoh)[0] errindex = np.where( errArray[:, i] <= maxErr)[0] dtindex = np.where( np.abs(dtArray[:, i]) <= maxDt)[0] index = np.intersect1d(cohindex, errindex) index = np.intersect1d(index, dtindex) index = np.intersect1d(index, pairindex) wavg, wstd = wavg_wstd( dtArray[:, i][index], errArray[:, i][index]) new_dtArray[i] = wavg new_errArray[i] = wstd new_cohArray[i] = 1.0 dtArray = np.vstack((dtArray, new_dtArray)) errArray = np.vstack((errArray, new_errArray)) cohArray = np.vstack((cohArray, new_cohArray)) pairArray.append(group) del new_cohArray, new_dtArray, new_errArray,\ cohindex, errindex, dtindex, wavg, wstd # END OF GROUP HANDLING # then process all pairs + the ALL if len(dtArray.shape) == 1: # if there is only one pair: dtArray = dtArray.reshape((1, dtArray.shape[0])) cohArray = cohArray.reshape((1, cohArray.shape[0])) errArray = errArray.reshape((1, errArray.shape[0])) used = np.zeros(dtArray.shape) for i, pair in enumerate(pairArray): cohindex = np.where(cohArray[i] >= minCoh)[0] errindex = np.where(errArray[i] <= maxErr)[0] dtindex = np.where(np.abs(dtArray[i]) <= maxDt)[0] #~ index = np.intersect1d(tindex, cohindex) index = np.intersect1d(cohindex, errindex) index = np.intersect1d(index, dtindex) used[i][index] = 1.0 w = 1.0 / errArray[i][index] w[~np.isfinite(w)] = 1.0 VecXfilt = tArray[index] VecYfilt = dtArray[i][index] if len(VecYfilt) >= 2: m, a, em, ea = linear_regression( VecXfilt, VecYfilt, w, intercept_origin=False) m0, em0 = linear_regression( VecXfilt, VecYfilt, w, intercept_origin=True) M.append(m) EM.append(em) A.append(a) EA.append(ea) M0.append(m0) EM0.append(em0) Dates.append(current) Pairs.append(pair) del m, a, em, ea, m0, em0 del VecXfilt, VecYfilt, w del index, cohindex, errindex, dtindex logging.debug( "%s: exporting: %i pairs" % (current, len(pairArray))) df = pd.DataFrame( {'Pairs': Pairs, 'M': M, 'EM': EM, 'A': A, 'EA': EA, 'M0': M0, 'EM0': EM0}, index=pd.DatetimeIndex(Dates)) # Needs to be changed ! output = os.path.join( 'DTT', "%02i" % filterid, "%03i_DAYS" % mov_stack, components) if not os.path.isdir(output): os.makedirs(output) df.to_csv( os.path.join(output, '%s.txt' % current), index_label='Date') del df, M, EM, A, EA, M0, EM0, Pairs, Dates, used del tArray, dtArray, errArray, cohArray, pairArray del output logging.info('*** Finished: Compute DT/T ***')