def read_multiplet(filename, idx, return_tp=False, db_path=cfg.dbpath, db_path_T='template_db_1/', db_path_M='matched_filter_1/'): """ read_multiplet(filename, idx, db_path=cfg.dbpath) \n """ S = Stream() f_meta = db_path + db_path_M + filename + 'meta.h5' fm = h5.File(f_meta, 'r') T = read_template('template{:d}'.format(fm['template_id'][0]), db_path=db_path + db_path_T) f_wave = db_path + db_path_M + filename + 'wav.h5' fw = h5.File(f_wave, 'r') waveforms = fw['waveforms'][idx, :, :, :] fw.close() #--------------------------------- stations = fm['stations'][:].astype('U') components = fm['components'][:].astype('U') ns = len(stations) nc = len(components) #--------------------------------- date = udt(fm['origin_times'][idx]) for s in range(ns): for c in range(nc): S += Trace(data=waveforms[s, c, :]) S[-1].stats['station'] = stations[s] S[-1].stats['channel'] = components[c] S[-1].stats['sampling_rate'] = cfg.sampling_rate S[-1].stats.starttime = date S.s_moveouts = T.metadata['s_moveouts'] S.p_moveouts = T.metadata['p_moveouts'] S.source_idx = T.metadata['source_idx'] S.template_ID = T.metadata['template_idx'] S.latitude = T.metadata['latitude'] S.longitude = T.metadata['longitude'] S.depth = T.metadata['depth'] S.corr = fm['correlation_coefficients'][idx] S.stations = stations.tolist() S.components = components.tolist() fm.close() if return_tp: return S, T else: return S
def SVDWF_multiplets_bulk(template_id, db_path=autodet.cfg.dbpath, db_path_M='matched_filter_1/', db_path_T='template_db_1/', \ WAVEFORMS=None, best=False, normRMS=True, \ n_singular_values=5, max_freq=autodet.cfg.max_freq, attach_raw_data=False): from obspy import Stream, Trace from scipy.linalg import svd from scipy.signal import wiener #----------------------------------------------------------------------------------------------- T = autodet.db_h5py.read_template('template{:d}'.format(template_id), db_path=db_path+db_path_T) #----------------------------------------------------------------------------------------------- files_all = glob.glob(db_path + db_path_M + '*multiplets_*meta.h5') files = [] #------------------------------ S = Stream() CC = [] tid_str = str(template_id) t1 = give_time() for file in files_all: with h5.File(file, mode='r') as f: if tid_str in f.keys(): files.append(file[:-len('meta.h5')]) CC.extend(f[tid_str]['correlation_coefficients'][()].tolist()) CC = np.float32(CC) t2 = give_time() print('{:.2f} s to retrieve the correlation coefficients.'.format(t2-t1)) if len(files) == 0: print("None multiplet for template {:d} !! Return None".format(template_id)) return None with h5.File(files[0] + 'meta.h5', mode='r') as f: S.stations = f[tid_str]['stations'][()].astype('U').tolist() S.components = f[tid_str]['components'][()].astype('U').tolist() ns = len(S.stations) nc = len(S.components) S.latitude = T.metadata['latitude'] S.longitude = T.metadata['longitude'] S.depth = T.metadata['depth'] #------------------------------ #---------------------------------------------- if WAVEFORMS is None: CC = np.sort(CC) if len(CC) > 300: CC_thres = CC[-101] elif len(CC) > 70: CC_thres = CC[int(7./10.*len(CC))] # the best 30% elif len(CC) > 30: CC_thres = np.median(CC) # the best 50% elif len(CC) > 10: CC_thres = np.percentile(CC, 33.) # the best 66% detections else: CC_thres = 0. Nstack = np.zeros((ns, nc), dtype=np.float32) WAVEFORMS = [] Nmulti = 0 t1 = give_time() for file in files: if best: with h5.File(file + 'meta.h5', mode='r') as fm: selection = np.where(fm[tid_str]['correlation_coefficients'][:] > CC_thres)[0] if selection.size == 0: continue with h5.File(file + 'wav.h5', mode='r') as fw: WAVEFORMS.append(fw[tid_str]['waveforms'][selection, :, :, :]) else: with h5.File(file + 'wav.h5', mode='r') as fw: WAVEFORMS.append(fw[tid_str]['waveforms'][()]) Nmulti += WAVEFORMS[-1].shape[0] for m in range(WAVEFORMS[-1].shape[0]): for s in range(ns): for c in range(nc): if normRMS: norm = np.sqrt(np.var(WAVEFORMS[-1][m,s,c,:])) else: norm =1. if norm != 0.: WAVEFORMS[-1][m,s,c,:] /= norm t2 = give_time() print('{:.2f} s to retrieve the waveforms.'.format(t2-t1)) elif normRMS: for m in range(WAVEFORMS.shape[0]): for s in range(ns): for c in range(nc): norm = np.sqrt(np.var(WAVEFORMS[m,s,c,:])) if norm != 0.: WAVEFORMS[m,s,c,:] /= norm else: pass WAVEFORMS = np.vstack(WAVEFORMS) WAVEFORMS = WAVEFORMS.reshape(-1, ns, nc, WAVEFORMS.shape[-1]) print(WAVEFORMS.shape) filtered_data = np.zeros_like(WAVEFORMS) for s in range(ns): for c in range(nc): filtered_data[:,s,c,:] = SVDWF(WAVEFORMS[:,s,c,:], n_singular_values, max_freq=max_freq) #filtered_data[:,s,c,:] = spectral_filtering(WAVEFORMS[:,s,c,:], SNR_thres=5., max_freq=max_freq) mean = np.mean(filtered_data[:,s,c,:], axis=0) mean /= np.abs(mean).max() S += Trace(data=mean) S[-1].stats.station = S.stations[s] S[-1].stats.channel = S.components[c] S[-1].stats.sampling_rate = autodet.cfg.sampling_rate S.data = filtered_data if attach_raw_data: S.raw_data = WAVEFORMS S.Nmulti = Nmulti return S
def spectral_filtering_detections(tid, db_path_T='template_db_1/', db_path_M='matched_filter_1/', db_path=autodet.cfg.dbpath, SNR_thres=5., WAVEFORMS=None, normRMS=True, best=True): from subprocess import Popen, PIPE from obspy import Stream, Trace #----------------------------------------------------------------------------------------------- T = autodet.db_h5py.read_template('template{:d}'.format(tid), db_path=db_path+db_path_T) #----------------------------------------------------------------------------------------------- print("Looking for {}{:d}_*".format(db_path + db_path_M + '*multiplets', tid)) files_list = Popen('ls '+db_path+db_path_M+'*multiplets{:d}_*'.format(tid), stdout=PIPE, shell=True).stdout line = files_list.readline()[:-1] files = [] while len(line) != 0: files.append(line.decode('utf-8')) line = files_list.readline()[:-1] i = 0 Nsamp = 0 ns = 0 S = Stream() #------------- retrieve metadata --------------- while True: try: wav = files[i][-len('wav.h5'):] == 'wav.h5' if wav: if Nsamp == 0: with h5.File(files[i], mode='r') as fwav0: Nsamp = fwav0['waveforms'][:,:,:,:].shape[-1] i += 1 else: with h5.File(files[i], mode='r') as fm0: if len(fm0['origin_times']) == 0: i += 1 continue else: i += 1 nc = len(fm0['components'][()]) ns = len(fm0['stations'][()]) S.stations = fm0['stations'][()].astype('U').tolist() S.components = fm0['components'][()].astype('U').tolist() S.latitude = T.metadata['latitude'] S.longitude = T.metadata['longitude'] S.depth = T.metadata['depth'] S.template_ID = tid if ns != 0 and Nsamp != 0: break except IndexError: print("None multiplet for template {:d} !! Return None".format(tid)) return None #---------------------------------------------- if WAVEFORMS is None: CC = np.zeros(0, dtype=np.float32) if best: for file in files: if file[-len('meta.h5'):] != 'meta.h5': continue with h5.File(file, mode='r') as fm: if len(fm['correlation_coefficients']) == 0: continue else: CC = np.hstack((CC, fm['correlation_coefficients'][:])) CC = np.sort(CC) #CC_thres = np.sort(CC)[-min(5, len(CC))] if len(CC) > 300: CC_thres = CC[-101] elif len(CC) > 70: CC_thres = CC[int(7./10.*len(CC))] # the best 30% elif len(CC) > 30: CC_thres = np.median(CC) # the best 50% elif len(CC) > 10: CC_thres = np.percentile(CC, 33.) # the best 66% detections else: CC_thres = 0. Nstack = np.zeros((ns, nc), dtype=np.float32) WAVEFORMS = np.zeros((0,ns,nc,Nsamp), dtype=np.float32) Nmulti = 0 for file in files: if file[-len('wav.h5'):] != 'wav.h5': continue with h5.File(file, mode='r') as fw: if len(fw['waveforms']) == 0: continue else: if best: with h5.File(file[:-len('wav.h5')]+'meta.h5', mode='r') as fm: selection = np.where(fm['correlation_coefficients'][:] > CC_thres)[0] if selection.size == 0: continue waves = np.zeros((selection.size, ns, nc, Nsamp), dtype=np.float32) waves[:,:,:,:] = fw['waveforms'][selection,:,:,:] else: waves = fw['waveforms'][:,:,:,:] Nmulti += waves.shape[0] for m in range(waves.shape[0]): for s in range(ns): for c in range(nc): if normRMS: norm = np.sqrt(np.var(waves[m,s,c,:])) else: norm =1. if norm != 0.: waves[m,s,c,:] /= norm WAVEFORMS = np.vstack((WAVEFORMS, waves)) elif normRMS: for m in range(WAVEFORMS.shape[0]): for s in range(ns): for c in range(nc): norm = np.sqrt(np.var(WAVEFORMS[m,s,c,:])) if norm != 0.: WAVEFORMS[m,s,c,:] /= norm else: pass filtered_waveforms = np.zeros((ns, nc, Nsamp), dtype=np.float32) for s in range(ns): for c in range(nc): filtered_waveforms[s, c, :] = np.sum(spectral_filtering(WAVEFORMS[:, s, c, :], SNR_thres=SNR_thres), axis=0) return filtered_waveforms
def SVDWF_multiplets_test(template_id, db_path=autodet.cfg.dbpath, db_path_M='matched_filter_2/', db_path_T='template_db_2/', WAVEFORMS=None, normRMS=True, Nsing_values=5, max_freq=autodet.cfg.max_freq, attach_raw_data=False): from subprocess import Popen, PIPE from obspy import Stream, Trace from scipy.linalg import svd from scipy.signal import wiener #----------------------------------------------------------------------------------------------- T = autodet.db_h5py.read_template('template{:d}'.format(template_id), db_path=db_path+db_path_T) #----------------------------------------------------------------------------------------------- print("Looking for {}{:d}_*".format(db_path + db_path_M + '*multiplets', template_id)) files_list = Popen('ls '+db_path+db_path_M+'*multiplets{:d}_*'.format(template_id), stdout=PIPE, shell=True).stdout line = files_list.readline()[:-1] files = [] while len(line) != 0: files.append(line.decode('utf-8')) line = files_list.readline()[:-1] i = 0 Nsamp = 0 ns = 0 S = Stream() #------------- retrieve metadata --------------- while True: try: wav = files[i][-len('wav.h5'):] == 'wav.h5' if wav: if Nsamp == 0: with h5.File(files[i], mode='r') as fwav0: Nsamp = fwav0['waveforms'][:,:,:,:].shape[-1] i += 1 else: with h5.File(files[i], mode='r') as fm0: if len(fm0['origin_times']) == 0: i += 1 continue else: i += 1 nc = len(fm0['components'][()]) ns = len(fm0['stations'][()]) S.stations = fm0['stations'][()].astype('U').tolist() S.components = fm0['components'][()].astype('U').tolist() S.latitude = T.metadata['latitude'] S.longitude = T.metadata['longitude'] S.depth = T.metadata['depth'] S.template_id = template_id if ns != 0 and Nsamp != 0: break except IndexError: print("None multiplet for template {:d} !! Return None".format(template_id)) return None #---------------------------------------------- catalog = autodet.db_h5py.read_catalog_multiplets('multiplets{:d}'.format(template_id), db_path_M=db_path_M, db_path=db_path) CC = catalog['correlation_coefficients'] best_detection_indexes = np.argsort(CC)[::-1] if CC.size > 300: best_detection_indexes = best_detection_indexes[:100] # the best 100 detections elif CC.size > 100: best_detection_indexes = best_detection_indexes[:int(30./100. * CC.size)] # the best 30% elif CC.size > 50: best_detection_indexes = best_detection_indexes[:int(50./100. * CC.size)] # the best 50% elif CC>size > 10: best_detection_indexes = best_detection_indexes[:int(66./100. * CC.size)] # the best 66% else: pass # keep all detections # reorder by chronological order best_detection_indexes = best_detection_indexes[np.argsort(catalog['origin_times'][best_detection_indexes])] # get the waveforms n_events = best_detection_indexes.size WAVEFORMS = np.zeros((n_events, ns, nc, Nsamp), dtype=np.float32) filename0 = db_path + db_path_M + catalog['filenames'][best_detection_indexes[0]].decode('utf-8') f = h5.File(filename0 + 'wav.h5', mode='r') for n in range(n_events): filename = db_path + db_path_M + catalog['filenames'][best_detection_indexes[n]].decode('utf-8') if filename == filename0: pass else: f.close() f = h5.File(filename + 'wav.h5', mode='r') WAVEFORMS[n, :, :, :] = f['waveforms'][catalog['indices'][best_detection_indexes[n]], :, :, :] # normalization for s in range(ns): for c in range(nc): if normRMS: norm = np.std(WAVEFORMS[n, s, c, :]) else: norm = np.abs(WAVEFORMS[n, s, c, :]).max() if norm != 0.: WAVEFORMS[n, s, c, :] /= norm filtered_data = np.zeros((n_events, ns, nc, Nsamp), dtype=np.float32) for s in range(ns): for c in range(nc): filtered_data[:,s,c,:] = SVDWF(WAVEFORMS[:,s,c,:], Nsing_values, max_freq=max_freq) #filtered_data[:,s,c,:] = spectral_filtering(WAVEFORMS[:,s,c,:], SNR_thres=5., max_freq=max_freq) mean = np.mean(filtered_data[:,s,c,:], axis=0) mean /= np.abs(mean).max() S += Trace(data=mean) S[-1].stats.station = S.stations[s] S[-1].stats.channel = S.components[c] S[-1].stats.sampling_rate = autodet.cfg.sampling_rate S.data = filtered_data if attach_raw_data: S.raw_data = WAVEFORMS S.Nmulti = best_detection_indexes.size return S