path_classification = './classification/' path_template_db = './template_event_database/' classifier = models.load_model(path_classification + 'classifier.h5') features_dataset = {} with h5.File(path_classification + 'features_dataset.h5', mode='r') as f: for item in list(f.keys()): features_dataset[item] = f[item][()] features = features_dataset['features'] n_detections = features.shape[0] features = features.reshape(n_detections, -1) t1 = give_time() predictions = classifier.predict(features) t2 = give_time() print("{:.2f}sec to classify the {:d} detections".format( t2 - t1, n_detections)) I = np.where(predictions[:, 0] > 0.5)[0] from keras.models import Model layer_name = classifier.layers[1].name intermediate_layer_model = Model( inputs=classifier.input, outputs=classifier.get_layer(layer_name).output) intermediate_output = intermediate_layer_model.predict(features[I, :])
def compute_envelopes(traces): start = give_time() traces = envelope_parallel(traces) # take the upper envelope of the traces end = give_time() print('Computed the envelopes in {:.2f}sec.'.format(end - start)) return traces
def calc_network_response(data, moveouts, phase, device='gpu', n_closest_stations=None, envelopes=True, test_points=None, saturation=False): """ Calculates the network response of the specified stations and components with the given phase and moveouts.\n calc_network_response(day_data, moveouts, phase, stations, components, device='gpu', closest=False)\n Can run on GPUs if device='gpu'. The argument 'closest' allows to use only the stations closest to each grid point in the computation of the network response. """ ANOMALY_THRESHOLD = 1.e-12 # threshold used to determine if a trace is garbage or not # depends on which unit the trace is stations = data['metadata']['stations'] components = data['metadata']['components'] if isinstance(stations, str): stations = [stations] if isinstance(components, str): components = [components] traces = np.array(data['waveforms'], copy=True) #----------------------------- n_stations = traces.shape[0] n_components = traces.shape[1] n_samples = traces.shape[2] #----------------------------- # Initialize the network response object network_response = NetworkResponse(stations, components) if phase in ('p', 'P'): print( 'Use the P-wave moveouts to compute the Composite Network Response' ) moveout = moveouts.p_relative_samp elif phase in ('s', 'S'): print( 'Use the S-wave moveouts to compute the Composite Network Response' ) moveout = moveouts.s_relative_samp elif phase in ('sp', 'SP'): print( 'Use the P- and S-wave moveouts to compute the Composite Network Response' ) moveoutS = moveouts.s_relative_p_samp moveoutP = moveouts.p_relative_samp smooth_win = cmn.to_samples(cfg.smooth, data['metadata']['sampling_rate']) data_availability = np.zeros(n_stations, dtype=np.int32) if envelopes: window_length = cmn.to_samples(cfg.template_len, data['metadata']['sampling_rate']) start = give_time() detection_traces = envelope_parallel( traces) # take the upper envelope of the traces end = give_time() print('Computed the envelopes in {:.2f}sec.'.format(end - start)) for s in range(n_stations): for c in range(n_components): missing_samples = detection_traces[s, c, :] == 0. if np.sum(missing_samples) > detection_traces.shape[-1] / 2: continue median = np.median(detection_traces[s, c, ~missing_samples]) mad = cmn.mad(detection_traces[s, c, ~missing_samples]) if mad < ANOMALY_THRESHOLD: continue detection_traces[s, c, :] = (detection_traces[s, c, :] - median) / mad detection_traces[s, c, missing_samples] = 0. data_availability[s] += 1 else: # compute the daily MADs (Median Absolute Deviation) to normalize the traces # this is an empirical way of correcting for instrument's sensitivity MADs = np.zeros((n_stations, n_components), dtype=np.float32) for s in range(n_stations): for c in range(n_components): traces[s, c, :] -= np.median(traces[s, c, :]) mad = cmn.mad(traces[s, c, :]) MADs[s, c] = np.float32(mad) if MADs[s, c] != 0.: traces[s, c, :] /= MADs[s, c] data_availability[s] += 1 detection_traces = np.square(traces) # we consider data to be available if more than 1 channel were operational data_availability = data_availability > 1 network_response.data_availability = data_availability print('{:d} / {:d} available stations'.format(data_availability.sum(), data_availability.size)) if data_availability.sum() < data_availability.size // 2: print('Less than half the stations are available, pass!') network_response.success = False return network_response else: network_response.success = True if n_closest_stations is not None: moveouts.get_closest_stations(data_availability, n_closest_stations) print( 'Compute the beamformed network response only with the closest stations to each test seismic source' ) else: moveouts.closest_stations_indexes = None if saturation: print('Saturate the high amplitudes by using hyperbolic tangent.') for s in range(n_stations): for c in range(n_components): # use a non-linear function that saturates after some threshold. # here we use tanh, which saturates after x = 1 (tanh(1.) = 0.76, tanh(+infinity) = 1.) # around 0, tanh behaves as identity saturation_factor = np.percentile(detection_traces[s, c, :], 95.00) if saturation_factor != 0.: detection_traces[s, c, :] = np.tanh( detection_traces[s, c, :] / saturation_factor) * (saturation_factor / (np.pi / 2.)) #traces = traces.squeeze() if phase in ('sp', 'SP'): composite, where = clib.network_response_SP( np.mean(detection_traces[:, :-1, :], axis=1), detection_traces[:, -1, :], moveoutP, moveoutS, smooth_win, device=device, closest_stations=moveouts.closest_stations_indexes, test_points=test_points) network_response.sp = True else: composite, where = clib.network_response( traces[:, 0, :], # North component traces[:, 1, :], # East component moveouts.cosine_azimuths, moveouts.sine_azimuths, moveout, smooth_win, device=device, closest_stations=moveouts.closest_stations_indexes, test_points=test_points) network_response.sp = False network_response.raw_composite = np.array(composite, copy=True) # remove the baseline window = np.int32(2. * 60. * cfg.sampling_rate) composite -= baseline(composite, window) smoothed = gaussian_filter1d(composite, np.int32(5. * cfg.sampling_rate)) network_response.composite = composite network_response.where = where network_response.smoothed = smoothed return network_response
def test_matched_filter(n_templates=1, n_stations=1, n_components=1, template_duration=10, data_duration=86400, sampling_rate=100, step=1, arch='cpu', check_zeros='first', normalize='short'): """Test the `matched_filter` function. Generate random data, templates, and moveouts, and run a matched-filter search. The templates are sliced from the data, therefore the maximum correlation coefficient should always be one if the program ran normally. Try `normalize='full'` and/or `arch='precise' or 'gpu'` to achieve better numerical precision. Parameters ---------- n_templates: scalar, int, optional Number of synthetic templates. Default to 1. n_stations: scalar, int, optional Number of stations. Default to 1. n_components: scalar, int, optional Number of components/channels. Default to 1. template_duration: scalar, float, optional Duration, in seconds, of the template waveforms. Default to 10s. data_duration: scalar, float, optional Duration, in seconds, of the data waveforms. Default to 86400s. sampling_rate: scalar, float, optional Sampling frequency (Hz) of the waveforms. Default to 100Hz. step: scalar, int Time interval, in samples, between consecutive correlations. arch: string, optional One `'cpu'`, `'precise'` or `'gpu'`. The `'precise'` implementation is a CPU implementation that slower but more accurate than `'cpu'`. The GPU implementation is used if `arch='gpu'`. Default is `'cpu'`. check_zeros: string, optional Controls the verbosity level at the end of this routine when checking zeros in the time series of correlation coefficients (CCs). - False: No messages. - `'first'`: Check zeros on the first template's CCs (recommended). - `'all'`: Check zeros on each template's CCs. It can be useful for troubleshooting but in general this would print too many messages. Default is `'first'`. normalize: string, optional Either "short" or "full" - full is slower but removes the mean of the data at every correlation. Short is the original implementation. NB: When using normalize="short", the templates and the data sliding windows must have zero means (high-pass filter the data if necessary). Returns -------- templates: numpy.ndarray (n_templates, n_stations, n_components, n_tp_samples) `numpy.ndarray` with the random template waveforms generated by the function. moveouts: numpy.ndarray (n_templates, n_stations, n_components) `numpy.ndarray` with the random moveouts generated by the function. data: numpy.ndarray (n_stations, n_components, n_samples) `numpy.ndarray` with the random data generated by the function. step: scalar, int Time interval, in samples, between consecutive correlations. cc_sums: numpy.ndarray, float 2D (n_templates, n_correlations) `numpy.ndarray`. The number of correlations is controlled by `step`. """ from time import time as give_time template_times = np.random.random_sample(n_templates) * (data_duration / 2) # if step is not 1, not very likely that random times will be found if step != 1: template_times = np.round(template_times / (step / sampling_rate)) * (step / sampling_rate) # determines how many templates there are min_moveout = 0 max_moveout = 10 moveouts = np.zeros((n_templates, n_stations, n_components)) for t in range(n_templates): for s in range(n_stations): moveouts[t, s, :] = (np.random.random_sample(n_components) * (max_moveout - min_moveout)) + min_moveout moveouts = np.round(moveouts * sampling_rate) # generate data n_samples_data = data_duration * sampling_rate if float(int(n_samples_data)) == float(n_samples_data): n_samples_data = np.int32(n_samples_data) else: print('The data duration times the sampling rate yields a non-integer number of samples !') print('Adjust your input parameters so that this product is an integer.') return data = np.random.random_sample((n_stations, n_components, n_samples_data)) for s in range(n_stations): for c in range(n_components): data[s, c, :] = data[s, c, :] - np.mean(data[s, c, :]) # generate templates from data n_samples_template = template_duration * sampling_rate if float(int(n_samples_template)) == float(n_samples_template): n_samples_template = np.int32(n_samples_template) else: print('The template duration times the sampling rate yields a non-integer number of samples !') print('Adjust your input parameters so that this product is an integer.') return n_templates = template_times.size templates = np.zeros((n_templates, n_stations, n_components, n_samples_template)) for t in range(n_templates): start_t = template_times[t] * sampling_rate template = np.zeros((n_stations, n_components, n_samples_template)) for s in range(n_stations): for c in range(n_components): start = int(start_t + np.round(moveouts[t, s, c])) stop = int(start_t + n_samples_template + np.round(moveouts[t, s, c])) template[s, c, :n_samples_template] = data[s, c, start:stop] templates[t, :, :, :n_samples_template] = template weights = np.ones((n_templates, n_stations, n_components)) / (n_stations * n_components) start_time = give_time() cc_sum = matched_filter(templates, moveouts, weights, data, step, arch=arch, check_zeros=check_zeros, normalize=normalize) stop_time = give_time() print("Matched filter ({}) for {} templates on {} stations/{} " "components over {} samples ({} step) ran in {:.3f}s". format(arch, n_templates, n_stations, n_components, n_samples_data, step, (stop_time - start_time))) return templates, moveouts, data, step, cc_sum
def SVDWF_multiplets_bulk(template_id, db_path=autodet.cfg.dbpath, db_path_M='matched_filter_1/', db_path_T='template_db_1/', \ WAVEFORMS=None, best=False, normRMS=True, \ n_singular_values=5, max_freq=autodet.cfg.max_freq, attach_raw_data=False): from obspy import Stream, Trace from scipy.linalg import svd from scipy.signal import wiener #----------------------------------------------------------------------------------------------- T = autodet.db_h5py.read_template('template{:d}'.format(template_id), db_path=db_path+db_path_T) #----------------------------------------------------------------------------------------------- files_all = glob.glob(db_path + db_path_M + '*multiplets_*meta.h5') files = [] #------------------------------ S = Stream() CC = [] tid_str = str(template_id) t1 = give_time() for file in files_all: with h5.File(file, mode='r') as f: if tid_str in f.keys(): files.append(file[:-len('meta.h5')]) CC.extend(f[tid_str]['correlation_coefficients'][()].tolist()) CC = np.float32(CC) t2 = give_time() print('{:.2f} s to retrieve the correlation coefficients.'.format(t2-t1)) if len(files) == 0: print("None multiplet for template {:d} !! Return None".format(template_id)) return None with h5.File(files[0] + 'meta.h5', mode='r') as f: S.stations = f[tid_str]['stations'][()].astype('U').tolist() S.components = f[tid_str]['components'][()].astype('U').tolist() ns = len(S.stations) nc = len(S.components) S.latitude = T.metadata['latitude'] S.longitude = T.metadata['longitude'] S.depth = T.metadata['depth'] #------------------------------ #---------------------------------------------- if WAVEFORMS is None: CC = np.sort(CC) if len(CC) > 300: CC_thres = CC[-101] elif len(CC) > 70: CC_thres = CC[int(7./10.*len(CC))] # the best 30% elif len(CC) > 30: CC_thres = np.median(CC) # the best 50% elif len(CC) > 10: CC_thres = np.percentile(CC, 33.) # the best 66% detections else: CC_thres = 0. Nstack = np.zeros((ns, nc), dtype=np.float32) WAVEFORMS = [] Nmulti = 0 t1 = give_time() for file in files: if best: with h5.File(file + 'meta.h5', mode='r') as fm: selection = np.where(fm[tid_str]['correlation_coefficients'][:] > CC_thres)[0] if selection.size == 0: continue with h5.File(file + 'wav.h5', mode='r') as fw: WAVEFORMS.append(fw[tid_str]['waveforms'][selection, :, :, :]) else: with h5.File(file + 'wav.h5', mode='r') as fw: WAVEFORMS.append(fw[tid_str]['waveforms'][()]) Nmulti += WAVEFORMS[-1].shape[0] for m in range(WAVEFORMS[-1].shape[0]): for s in range(ns): for c in range(nc): if normRMS: norm = np.sqrt(np.var(WAVEFORMS[-1][m,s,c,:])) else: norm =1. if norm != 0.: WAVEFORMS[-1][m,s,c,:] /= norm t2 = give_time() print('{:.2f} s to retrieve the waveforms.'.format(t2-t1)) elif normRMS: for m in range(WAVEFORMS.shape[0]): for s in range(ns): for c in range(nc): norm = np.sqrt(np.var(WAVEFORMS[m,s,c,:])) if norm != 0.: WAVEFORMS[m,s,c,:] /= norm else: pass WAVEFORMS = np.vstack(WAVEFORMS) WAVEFORMS = WAVEFORMS.reshape(-1, ns, nc, WAVEFORMS.shape[-1]) print(WAVEFORMS.shape) filtered_data = np.zeros_like(WAVEFORMS) for s in range(ns): for c in range(nc): filtered_data[:,s,c,:] = SVDWF(WAVEFORMS[:,s,c,:], n_singular_values, max_freq=max_freq) #filtered_data[:,s,c,:] = spectral_filtering(WAVEFORMS[:,s,c,:], SNR_thres=5., max_freq=max_freq) mean = np.mean(filtered_data[:,s,c,:], axis=0) mean /= np.abs(mean).max() S += Trace(data=mean) S[-1].stats.station = S.stations[s] S[-1].stats.channel = S.components[c] S[-1].stats.sampling_rate = autodet.cfg.sampling_rate S.data = filtered_data if attach_raw_data: S.raw_data = WAVEFORMS S.Nmulti = Nmulti return S
# if you have Nvidia GPUs: device = 'gpu' # else: #device ='cpu' print('The codes will run on {}'.format(device)) print('If this was not your intention, edit this script and comment the right line for the variable "device".') # whether you use the P-wave moveouts to align the vertical trace or not #method = 'S' method = 'SP' filename = 'subgrid_downsampled' t1 = give_time() if method == 'S': MV = autodet.moveouts.MV_object(filename, net, \ relative=True, \ remove_airquakes=True) elif method == 'SP': MV = autodet.moveouts.MV_object(filename, net, \ relativeSP=True, \ remove_airquakes=True) t2 = give_time() print('{:.2f}sec to load the moveouts.'.format(t2-t1)) test_points = np.arange(MV.n_sources, dtype=np.int32) # create a vector with indexes for every potential seismic source test_points = test_points[MV.idx_EQ] # remove the airquakes by removing some of the indexes band = [1., 12.] # used to know where to get the data if folders with different frequency bands exist (not relevant for this example)