def test_windows(): array1 = np.zeros(7, dtype=np.int) array2 = np.zeros(8, dtype=np.int) array1[3] = 1 array2[4] = 1 assert(my_centered(array1, 5))[2] == 1 assert(my_centered(array2, 5))[2] == 1 assert(my_centered(array1, 9))[4] == 1 assert(my_centered(array2, 9))[4] == 1 tr = Trace() tr.stats.sac = {} tr.stats.sac['dist'] = 3.0 tr.data = my_centered(array1, 15) + 1 params = {} params['hw'] = 1 params['sep_noise'] = 0 params['win_overlap'] = True params['wtype'] = 'hann' params['causal_side'] = True win = get_window(tr.stats, g_speed=1.0, params=params) assert(len(win) == 3) assert(pytest.approx(win[0][10]) == 1.0) snr = snratio(tr, g_speed=1.0, window_params=params) assert(int(snr) == 1)
def log_en_ratio_adj(corr_o, corr_s, g_speed, window_params): success = False window = wn.get_window(corr_o.stats, g_speed, window_params) win = window[0] wn.my_centered(corr_s.data, corr_o.stats.npts) if window[2]: sig_c = corr_s.data * win sig_a = corr_s.data * win[::-1] E_plus = np.trapz(np.power(sig_c, 2)) * corr_s.stats.delta E_minus = np.trapz(np.power(sig_a, 2)) * corr_s.stats.delta # to win**2 u_plus = sig_c * win u_minus = sig_a * win[::-1] adjt_src = 2. * (u_plus / E_plus - u_minus / E_minus) success = True else: adjt_src = win - win + np.nan return adjt_src, success
def get_correlation(trace1, trace2, wlen_samples, mlag_samples): ix = 0 nw = 0 while ix < len(trace1) - wlen_samples: win1 = trace1[ix:ix + wlen_samples] win2 = trace2[ix:ix + wlen_samples] wcorr = fftconvolve(win1[::-1], win2, mode="same") if "correlation" not in locals(): correlation = np.zeros(len(wcorr)) correlation += wcorr nw += 1 ix += wlen_samples corr_len = 2 * mlag_samples + 1 return (my_centered(correlation, corr_len) / nw)
def compute_correlation(input_files, all_conf, nsrc, all_ns, taper, insta=False): """ Compute noise cross-correlations from two .h5 'wavefield' files. Noise source distribution and spectrum is given by starting_model.h5 It is assumed that noise sources are delta-correlated in space. Metainformation: Include the reference station names for both stations from wavefield files, if possible. Do not include geographic information from .csv file as this might be error-prone. Just add the geographic info later if needed. """ wf1, wf2 = input_files ntime, n, n_corr, Fs = all_ns ntraces = nsrc.src_loc[0].shape[0] correlation = np.zeros(n_corr) if insta: # open database dbpath = all_conf.config['wavefield_path'] # open db = instaseis.open_db(dbpath) # get receiver locations station1 = wf1[0] station2 = wf2[0] lat1 = geograph_to_geocent(float(wf1[2])) lon1 = float(wf1[3]) rec1 = instaseis.Receiver(latitude=lat1, longitude=lon1) lat2 = geograph_to_geocent(float(wf2[2])) lon2 = float(wf2[3]) rec2 = instaseis.Receiver(latitude=lat2, longitude=lon2) else: wf1 = WaveField(wf1) taper1 = cosine_taper((wf1.stats["nt"])) wf2 = WaveField(wf2) taper2 = cosine_taper((wf2.stats["nt"])) station1 = wf1.stats['reference_station'] station2 = wf2.stats['reference_station'] # Make sure all is consistent if False in (wf1.sourcegrid[1, 0:10] == wf2.sourcegrid[1, 0:10]): raise ValueError("Wave fields not consistent.") if False in (wf1.sourcegrid[1, -10:] == wf2.sourcegrid[1, -10:]): raise ValueError("Wave fields not consistent.") if False in (wf1.sourcegrid[0, -10:] == nsrc.src_loc[0, -10:]): raise ValueError("Wave field and source not consistent.") # Loop over source locations print_each_n = max(5, round(max(ntraces // 5, 1), -1)) for i in range(ntraces): # noise source spectrum at this location S = nsrc.get_spect(i) if S.sum() == 0.: # If amplitude is 0, continue. (Spectrum has 0 phase anyway.) continue if insta: # get source locations lat_src = geograph_to_geocent(nsrc.src_loc[1, i]) lon_src = nsrc.src_loc[0, i] fsrc = instaseis.ForceSource(latitude=lat_src, longitude=lon_src, f_r=1.e12) Fs = all_conf.config['wavefield_sampling_rate'] s1 = db.get_seismograms(source=fsrc, receiver=rec1, dt=1. / Fs)[0].data * taper1 s2 = db.get_seismograms(source=fsrc, receiver=rec2, dt=1. / Fs)[0].data * taper2 s1 = np.ascontiguousarray(s1) s2 = np.ascontiguousarray(s2) spec1 = np.fft.rfft(s1, n) spec2 = np.fft.rfft(s2, n) else: if not wf1.fdomain: # read Green's functions s1 = np.ascontiguousarray(wf1.data[i, :] * taper1) s2 = np.ascontiguousarray(wf2.data[i, :] * taper2) # Fourier transform for greater ease of convolution spec1 = np.fft.rfft(s1, n) spec2 = np.fft.rfft(s2, n) else: spec1 = np.ascontiguousarray(wf1.data[i, :]) spec2 = np.ascontiguousarray(wf2.data[i, :]) # convolve G1G2 g1g2_tr = np.multiply(np.conjugate(spec1), spec2) # convolve noise source c = np.multiply(g1g2_tr, S[:len(g1g2_tr)]) # transform back correlation += my_centered(np.fft.fftshift(np.fft.irfft(c, n)), n_corr) * nsrc.surf_area[i] # occasional info if i % print_each_n == 0 and all_conf.config['verbose']: print("Finished {} of {} source locations.".format(i, ntraces)) # end of loop over all source locations ####################################### return (correlation, station1, station2)
def g1g2_kern(wf1str, wf2str, kernel, adjt, src, source_conf, insta=False): measr_conf = yaml.safe_load( open(os.path.join(source_conf['source_path'], 'measr_config.yml'))) bandpass = measr_conf['bandpass'] conf = yaml.safe_load( open(os.path.join(source_conf['project_path'], 'config.yml'))) if bandpass is None: filtcnt = 1 elif type(bandpass) == list: if type(bandpass[0]) != list: filtcnt = 1 else: filtcnt = len(bandpass) ntime, n, n_corr, Fs = get_ns(wf1str, source_conf, insta) # use a one-sided taper: The seismogram probably has a non-zero end, # being cut off whereever the solver stopped running. taper = cosine_taper(ntime, p=0.01) taper[0:ntime // 2] = 1.0 ######################################################################## # Prepare filenames and adjoint sources ######################################################################## filenames = [] adjt_srcs = [] for ix_f in range(filtcnt): filename = kernel + '.{}.npy'.format(ix_f) filenames.append(filename) f = Stream() for a in adjt: adjtfile = a + '*.{}.sac'.format(ix_f) adjtfile = glob(adjtfile) try: f += read(adjtfile[0])[0] f[-1].data = my_centered(f[-1].data, n_corr) except IndexError: warn('No adjoint source found: {}\n'.format(a)) if len(f) > 0: adjt_srcs.append(f) else: return () ######################################################################## # Compute the kernels ######################################################################## with NoiseSource(src) as nsrc: # Uniform spatial weights. nsrc.distr_basis = np.ones(nsrc.distr_basis.shape) ntraces = nsrc.src_loc[0].shape[0] if insta: # open database dbpath = conf['wavefield_path'] # open and determine Fs, nt db = instaseis.open_db(dbpath) # get receiver locations lat1 = geograph_to_geocent(float(wf1[2])) lon1 = float(wf1[3]) rec1 = instaseis.Receiver(latitude=lat1, longitude=lon1) lat2 = geograph_to_geocent(float(wf2[2])) lon2 = float(wf2[3]) rec2 = instaseis.Receiver(latitude=lat2, longitude=lon2) else: wf1 = WaveField(wf1str) wf2 = WaveField(wf2str) # Make sure all is consistent if False in (wf1.sourcegrid[1, 0:10] == wf2.sourcegrid[1, 0:10]): raise ValueError("Wave fields not consistent.") if False in (wf1.sourcegrid[1, -10:] == wf2.sourcegrid[1, -10:]): raise ValueError("Wave fields not consistent.") if False in (wf1.sourcegrid[0, -10:] == nsrc.src_loc[0, -10:]): raise ValueError("Wave field and source not consistent.") kern = np.zeros((filtcnt, ntraces, len(adjt))) # Loop over locations print_each_n = max(5, round(max(ntraces // 5, 1), -1)) for i in range(ntraces): # noise source spectrum at this location # For the kernel, this contains only the basis functions of the # spectrum without weights; might still be location-dependent, # for example when constraining sensivity to ocean S = nsrc.get_spect(i) if S.sum() == 0.: # The spectrum has 0 phase so only checking # absolute value here continue if insta: # get source locations lat_src = geograph_to_geocent(nsrc.src_loc[1, i]) lon_src = nsrc.src_loc[0, i] fsrc = instaseis.ForceSource(latitude=lat_src, longitude=lon_src, f_r=1.e12) dt = 1. / source_conf['sampling_rate'] s1 = db.get_seismograms(source=fsrc, receiver=rec1, dt=dt)[0].data * taper s1 = np.ascontiguousarray(s1) s2 = db.get_seismograms(source=fsrc, receiver=rec2, dt=dt)[0].data * taper s2 = np.ascontiguousarray(s2) else: s1 = np.ascontiguousarray(wf1.data[i, :] * taper) s2 = np.ascontiguousarray(wf2.data[i, :] * taper) spec1 = np.fft.rfft(s1, n) spec2 = np.fft.rfft(s2, n) g1g2_tr = np.multiply(np.conjugate(spec1), spec2) c = np.multiply(g1g2_tr, S) ####################################################################### # Get Kernel at that location ####################################################################### corr_temp = my_centered(np.fft.fftshift(np.fft.irfft(c, n)), n_corr) ####################################################################### # Apply the 'adjoint source' ####################################################################### for ix_f in range(filtcnt): f = adjt_srcs[ix_f] if f is None: continue for j in range(len(f)): delta = f[j].stats.delta kern[ix_f, i, j] = np.dot(corr_temp, f[j].data) * delta if i % print_each_n == 0 and conf['verbose']: print("Finished {} of {} source locations.".format(i, ntraces)) if not insta: wf1.file.close() wf2.file.close() for ix_f in range(filtcnt): filename = filenames[ix_f] if kern[ix_f, :, :].sum() != 0: np.save(filename, kern[ix_f, :, :]) return ()
def measurement(source_config, mtype, step, ignore_net, ix_bandpass, bandpass, step_test, taper_perc, **options): """ Get measurements on noise correlation data and synthetics. options: g_speed,window_params (only needed if mtype is ln_energy_ratio or enery_diff) """ verbose = yaml.safe_load( open(os.path.join(source_config['project_path'], 'config.yml')))['verbose'] step_n = 'iteration_{}'.format(int(step)) step_dir = os.path.join(source_config['source_path'], step_n) if step_test: corr_dir = os.path.join(step_dir, 'obs_slt') else: corr_dir = os.path.join(source_config['source_path'], 'observed_correlations') files = [f for f in os.listdir(corr_dir)] files = [os.path.join(corr_dir, f) for f in files] synth_dir = os.path.join(step_dir, 'corr') columns = [ 'sta1', 'sta2', 'lat1', 'lon1', 'lat2', 'lon2', 'dist', 'az', 'baz', 'syn', 'syn_a', 'obs', 'obs_a', 'l2_norm', 'snr', 'snr_a', 'nstack' ] measurements = pd.DataFrame(columns=columns) options['window_params']['causal_side'] = True # relic for signal to noise _options_ac = copy.deepcopy(options) _options_ac['window_params']['causal_side'] = ( not (options['window_params']['causal_side'])) if files == []: msg = 'No input found!' raise ValueError(msg) for i, f in enumerate(files): # Read data try: tr_o = read(f)[0] except IOError: if verbose: print('\nCould not read data: ' + os.path.basename(f)) continue # Read synthetics synth_filename = get_synthetics_filename(os.path.basename(f), synth_dir, ignore_network=ignore_net) if synth_filename is None: continue try: tr_s = read(synth_filename)[0] except IOError: if verbose: print('\nCould not read synthetics: ' + synth_filename) continue # Assigning stats to synthetics, cutting them to right length tr_s.stats.sac = tr_o.stats.sac.copy() tr_s.data = my_centered(tr_s.data, tr_o.stats.npts) # Get all the necessary information info = get_station_info(tr_o.stats) # Collect the adjoint source adjoint_source = Stream() adjoint_source += Trace() adjoint_source[0].stats.sampling_rate = tr_s.stats.sampling_rate adjoint_source[0].stats.sac = tr_s.stats.sac.copy() # Filter if bandpass is not None: tr_o.taper(taper_perc / 100.) tr_o.filter('bandpass', freqmin=bandpass[0], freqmax=bandpass[1], corners=bandpass[2], zerophase=True) tr_s.taper(taper_perc / 100.) tr_s.filter('bandpass', freqmin=bandpass[0], freqmax=bandpass[1], corners=bandpass[2], zerophase=True) # Weight observed stack by nstack tr_o.data /= tr_o.stats.sac.user0 # Take the measurement func = rm.get_measure_func(mtype) msr_o = func(tr_o, **options) msr_s = func(tr_s, **options) # Get the adjoint source adjt_func = am.get_adj_func(mtype) adjt, success = adjt_func(tr_o, tr_s, **options) if not success: continue # timeseries-like measurements: if mtype in ['square_envelope', 'full_waveform', 'windowed_waveform']: l2_so = 0.5 * np.sum(np.power((msr_s - msr_o), 2)) snr = snratio(tr_o, **options) snr_a = snratio(tr_o, **_options_ac) info.extend([ np.nan, np.nan, np.nan, np.nan, l2_so, snr, snr_a, tr_o.stats.sac.user0 ]) adjoint_source[0].data = adjt # single value measurements: else: if mtype == 'energy_diff': l2_so = 0.5 * (msr_s - msr_o)**2 msr = msr_o[0] msr_a = msr_o[1] snr = snratio(tr_o, **options) snr_a = snratio(tr_o, **_options_ac) l2 = l2_so.sum() info.extend([ msr_s[0], msr_s[1], msr, msr_a, l2, snr, snr_a, tr_o.stats.sac.user0 ]) adjoint_source += adjoint_source[0].copy() for ix_branch in range(2): adjoint_source[ix_branch].data = adjt[ix_branch] adjoint_source[ix_branch].data *= (msr_s[ix_branch] - msr_o[ix_branch]) elif mtype == 'ln_energy_ratio': l2_so = 0.5 * (msr_s - msr_o)**2 msr = msr_o snr = snratio(tr_o, **options) snr_a = snratio(tr_o, **_options_ac) info.extend([ msr_s, np.nan, msr, np.nan, l2_so, snr, snr_a, tr_o.stats.sac.user0 ]) adjt *= (msr_s - msr_o) adjoint_source[0].data = adjt measurements.loc[i] = info # save the adjoint source if len(adjoint_source) == 1: adjt_filename = os.path.basename(synth_filename).rstrip('sac') +\ '{}.sac'.format(ix_bandpass) adjoint_source[0].write(os.path.join(step_dir, 'adjt', adjt_filename), format='SAC') elif len(adjoint_source) == 2: for ix_branch, branch in enumerate(['c', 'a']): adjt_filename = os.path.basename(synth_filename).\ rstrip('sac') + '{}.{}.sac'.format(branch, ix_bandpass) adjoint_source[ix_branch].write(os.path.join( step_dir, 'adjt', adjt_filename), format='SAC') else: raise ValueError("Some problem with adjoint sources.") return measurements
def g1g2_corr(wf1, wf2, corr_file, src, source_conf, insta=False): """ Compute noise cross-correlations from two .h5 'wavefield' files. Noise source distribution and spectrum is given by starting_model.h5 It is assumed that noise sources are delta-correlated in space. Metainformation: Include the reference station names for both stations from wavefield files, if possible. Do not include geographic information from .csv file as this might be error-prone. Just add the geographic info later if needed. """ with open(os.path.join(source_conf['project_path'], 'config.yml')) as fh: conf = yaml.safe_load(fh) with NoiseSource(src) as nsrc: ntime, n, n_corr, Fs = get_ns(wf1, source_conf, insta) # use a one-sided taper: The seismogram probably has a non-zero end, # being cut off wherever the solver stopped running. taper = cosine_taper(ntime, p=0.01) taper[0:ntime // 2] = 1.0 ntraces = nsrc.src_loc[0].shape[0] correlation = np.zeros(n_corr) if insta: # open database dbpath = conf['wavefield_path'] # open db = instaseis.open_db(dbpath) # get receiver locations lat1 = geograph_to_geocent(float(wf1[2])) lon1 = float(wf1[3]) rec1 = instaseis.Receiver(latitude=lat1, longitude=lon1) lat2 = geograph_to_geocent(float(wf2[2])) lon2 = float(wf2[3]) rec2 = instaseis.Receiver(latitude=lat2, longitude=lon2) else: wf1 = WaveField(wf1) wf2 = WaveField(wf2) # Make sure all is consistent if False in (wf1.sourcegrid[1, 0:10] == wf2.sourcegrid[1, 0:10]): raise ValueError("Wave fields not consistent.") if False in (wf1.sourcegrid[1, -10:] == wf2.sourcegrid[1, -10:]): raise ValueError("Wave fields not consistent.") if False in (wf1.sourcegrid[0, -10:] == nsrc.src_loc[0, -10:]): raise ValueError("Wave field and source not consistent.") # Loop over source locations print_each_n = max(5, round(max(ntraces // 5, 1), -1)) for i in range(ntraces): # noise source spectrum at this location S = nsrc.get_spect(i) if S.sum() == 0.: # If amplitude is 0, continue. (Spectrum has 0 phase anyway.) continue if insta: # get source locations lat_src = geograph_to_geocent(nsrc.src_loc[1, i]) lon_src = nsrc.src_loc[0, i] fsrc = instaseis.ForceSource(latitude=lat_src, longitude=lon_src, f_r=1.e12) Fs = conf['wavefield_sampling_rate'] s1 = db.get_seismograms(source=fsrc, receiver=rec1, dt=1. / Fs)[0].data * taper s2 = db.get_seismograms(source=fsrc, receiver=rec2, dt=1. / Fs)[0].data * taper s1 = np.ascontiguousarray(s1) s2 = np.ascontiguousarray(s2) else: if not wf1.fdomain: # read Green's functions s1 = np.ascontiguousarray(wf1.data[i, :] * taper) s2 = np.ascontiguousarray(wf2.data[i, :] * taper) # Fourier transform for greater ease of convolution spec1 = np.fft.rfft(s1, n) spec2 = np.fft.rfft(s2, n) else: pass # convolve G1G2 g1g2_tr = np.multiply(np.conjugate(spec1), spec2) # convolve noise source c = np.multiply(g1g2_tr, S) # transform back correlation += my_centered(np.fft.ifftshift(np.fft.irfft(c, n)), n_corr) * nsrc.surf_area[i] # occasional info if i % print_each_n == 0 and conf['verbose']: print("Finished {} of {} source locations.".format(i, ntraces)) # end of loop over all source locations ####################################### if not insta: wf1.file.close() wf2.file.close() # save output trace = Trace() trace.stats.sampling_rate = Fs trace.data = correlation # try to add some meta data try: sta1 = wf1.stats['reference_station'] sta2 = wf2.stats['reference_station'] trace.stats.station = sta1.split('.')[1] trace.stats.network = sta1.split('.')[0] trace.stats.location = sta1.split('.')[2] trace.stats.channel = sta1.split('.')[3] trace.stats.sac = {} trace.stats.sac['kuser0'] = sta2.split('.')[1] trace.stats.sac['kuser1'] = sta2.split('.')[0] trace.stats.sac['kuser2'] = sta2.split('.')[2] trace.stats.sac['kevnm'] = sta2.split('.')[3] except (KeyError, IndexError): pass trace.write(filename=corr_file, format='SAC')
def compute_kernel(input_files, all_conf, nsrc, all_ns, taper, insta=False): ntime, n, n_corr, Fs = all_ns wf1, wf2, adjt = input_files ######################################################################## # Prepare filenames and adjoint sources ######################################################################## adjt_srcs = [] for ix_f in range(all_conf.filtcnt): f = Stream() for a in adjt: adjtfile = a + '*.{}.sac'.format(ix_f) adjtfile = glob(adjtfile) try: f += read(adjtfile[0])[0] f[-1].data = my_centered(f[-1].data, n_corr) except IndexError: if all_conf.config['verbose']: print('No adjoint source found: {}\n'.format(a)) else: pass if len(f) > 0: adjt_srcs.append(f) else: return None # Uniform spatial weights. (current model is in the adjoint source) nsrc.distr_basis = np.ones(nsrc.distr_basis.shape) ntraces = nsrc.src_loc[0].shape[0] if insta: # open database dbpath = all_conf.config['wavefield_path'] # open and determine Fs, nt db = instaseis.open_db(dbpath) # get receiver locations lat1 = geograph_to_geocent(float(wf1[2])) lon1 = float(wf1[3]) rec1 = instaseis.Receiver(latitude=lat1, longitude=lon1) lat2 = geograph_to_geocent(float(wf2[2])) lon2 = float(wf2[3]) rec2 = instaseis.Receiver(latitude=lat2, longitude=lon2) else: wf1 = WaveField(wf1) wf2 = WaveField(wf2) # Make sure all is consistent if False in (wf1.sourcegrid[1, 0:10] == wf2.sourcegrid[1, 0:10]): raise ValueError("Wave fields not consistent.") if False in (wf1.sourcegrid[1, -10:] == wf2.sourcegrid[1, -10:]): raise ValueError("Wave fields not consistent.") if False in (wf1.sourcegrid[0, -10:] == nsrc.src_loc[0, -10:]): raise ValueError("Wave field and source not consistent.") kern = np.zeros( (nsrc.spect_basis.shape[0], all_conf.filtcnt, ntraces, len(adjt))) # Loop over locations print_each_n = max(5, round(max(ntraces // 5, 1), -1)) for i in range(ntraces): # noise source spectrum at this location # For the kernel, this contains only the basis functions of the # spectrum without weights; might still be location-dependent, # for example when constraining sensivity to ocean S = nsrc.get_spect(i) if S.sum() == 0.: # The spectrum has 0 phase so only checking # absolute value here continue if insta: # get source locations lat_src = geograph_to_geocent(nsrc.src_loc[1, i]) lon_src = nsrc.src_loc[0, i] fsrc = instaseis.ForceSource(latitude=lat_src, longitude=lon_src, f_r=1.e12) dt = 1. / all_conf.source_config['sampling_rate'] s1 = db.get_seismograms(source=fsrc, receiver=rec1, dt=dt)[0].data * taper s1 = np.ascontiguousarray(s1) s2 = db.get_seismograms(source=fsrc, receiver=rec2, dt=dt)[0].data * taper s2 = np.ascontiguousarray(s2) spec1 = np.fft.rfft(s1, n) spec2 = np.fft.rfft(s2, n) else: if not wf1.fdomain: s1 = np.ascontiguousarray(wf1.data[i, :] * taper) s2 = np.ascontiguousarray(wf2.data[i, :] * taper) spec1 = np.fft.rfft(s1, n) spec2 = np.fft.rfft(s2, n) else: spec1 = wf1.data[i, :] spec2 = wf2.data[i, :] g1g2_tr = np.multiply(np.conjugate(spec1), spec2) # spectrum for ix_spec in range(nsrc.spect_basis.shape[0]): c = np.multiply(g1g2_tr, nsrc.spect_basis[ix_spec, :]) ################################################################### # Get Kernel at that location ################################################################### corr_temp = my_centered(np.fft.fftshift(np.fft.irfft(c, n)), n_corr) ################################################################### # Apply the 'adjoint source' ################################################################### for ix_f in range(all_conf.filtcnt): f = adjt_srcs[ix_f] if f is None: continue for j in range(len(f)): delta = f[j].stats.delta kern[ix_spec, ix_f, i, j] = np.dot(corr_temp, f[j].data) * delta if i % print_each_n == 0 and all_conf.config['verbose']: print("Finished {} of {} source locations.".format(i, ntraces)) if not insta: wf1.file.close() wf2.file.close() return kern