def plot_template_event(idx): filename = 'detections_{}_'.format( udt(features_dataset['days'][idx]).strftime('%Y%m%d')) index = features_dataset['indexes'][idx] #---------------------------- with h5.File(autodet.cfg.dbpath + filename + 'wav.h5', mode='r') as f: waveforms_ = f['waveforms'][index, :, :, :] with h5.File(autodet.cfg.dbpath + filename + 'meta.h5', mode='r') as f: stations_ = f['stations'][index, :] components_ = f['components'][()] moveouts_ = np.int32(f['moveouts'][index, :, :] * autodet.cfg.sampling_rate) location_ = f['locations'][index, :] origin_t_ = udt(f['origin_times'][index]) n_stations = len(stations_) n_components = len(components_) t_min = 0 t_max = moveouts_.max() + waveforms_.shape[-1] fig = plt.figure('detection_{:d}'.format(idx), figsize=(27, 17)) plt.suptitle('Detection on {} from {:.2f}|{:.2f}|{:.2f}km'.format( origin_t_.strftime('%Y,%m,%d %H:%M:%S'), location_[0], location_[1], location_[2])) for s in range(n_stations): for c in range(n_components): plt.subplot(n_stations, n_components, s * n_components + c + 1) time = np.arange(waveforms_.shape[-1]) + moveouts_[s, c] plt.plot(time, waveforms_[s, c, :], label='{}.{}'.format(stations_[s].astype('U'), components_[c].astype('U'))) plt.xlim(t_min, t_max) plt.legend(loc='upper left', frameon=False) plt.subplots_adjust(top=0.94, bottom=0.08, left=0.05, right=0.95) plt.show()
def ReadData(date, band): """ ReadData(date, band) """ date = udt(date) filename = 'data_{:d}_{:d}/data_{}.h5'.format(int(band[0]), int(band[1]), date.strftime('%Y%m%d')) data = {} data.update({'metadata': {}}) with h5.File(cfg.input_path + filename, mode='r') as f: data.update({'waveforms': f['waveforms'][()]}) for item in f['metadata'].keys(): if len(f['metadata'][item][()]) == 1: data['metadata'].update({item: f['metadata'][item][()][0]}) else: data['metadata'].update({item: f['metadata'][item][()]}) data['metadata'].update({'date': udt(data['metadata']['date'])}) for item in ['stations', 'components']: data['metadata'][item] = data['metadata'][item].astype('U') return data
def plot_composite_network_response(composite): """ Plots the composite network response, shows which peaks are detections and the time-varying threshold. """ plt.figure('composite_network_response') ax1 = plt.subplot2grid((1, 4), (0,0)) _, _, _ = plt.hist(composite, bins=100) mad = np.median(np.abs(composite - np.median(composite))) threshold1 = np.median(composite) + autodet.cfg.ratio_threshold * mad plt.axvline(threshold1, lw=2, ls='--', color='k', label=r'Global $\mathrm{{median}}\ + {:d} \times \mathrm{{MAD}}$'.format(int(autodet.cfg.ratio_threshold))) plt.legend(loc='upper right', fancybox=True) plt.xlabel('Composite Network Response') plt.ylabel('Frequency') #-------------------------------------------------- ax2 = plt.subplot2grid((1, 4), (0, 1), colspan=3) plt.plot(composite) n_detections = detections['origin_times'].size T0 = udt(udt(detections['origin_times'][0]).strftime('%Y,%m,%d')).timestamp for d in range(n_detections-1): idx = np.int32((detections['origin_times'][d] - T0) * autodet.cfg.sampling_rate) plt.plot(idx, detections['composite_network_response'][d], ls='', marker='o', color='C3') idx = np.int32((detections['origin_times'][n_detections-1] - T0) * autodet.cfg.sampling_rate) plt.plot(idx, detections['composite_network_response'][n_detections-1], ls='', marker='o', color='C3', label='Candidate Template Events') threshold2 = autodet.template_search.time_dependent_threshold(composite, np.int32(0.5 * 3600. * autodet.cfg.sampling_rate)) plt.plot(threshold2, color='C5', ls='--', lw=2, label=r'Sliding $\mathrm{{median}}\ + {:d} \times \mathrm{{MAD}}$'.format(int(autodet.cfg.ratio_threshold))) xticks = np.arange(0, composite.size+1, np.int32(2.*3600.*autodet.cfg.sampling_rate)) plt.legend(loc='upper right', fancybox=True) xtick_labels = [udt(X/autodet.cfg.sampling_rate).strftime('%H:%M:%S') for X in xticks] plt.xticks(xticks, xtick_labels) plt.xlim(0, composite.size+1) plt.grid(axis='x') plt.xlabel('Time') plt.ylabel('Composite Network Response') #---------------------------- plt.subplots_adjust(top=0.88, bottom=0.5, left=0.07, right=0.97, hspace=0.245, wspace=0.275) plt.show()
def find_multiplets(templates_mat, moveouts_mat, data, template_ids, net, threshold_type='rms', weights_mat=None, buf=True, device='gpu', template_refinement=False): """ Find repetitions of the template waveforms, i.e. multiplets Parameters ---------- templates_mat : (n_stations, n_components, n_samples_template) array_like The template waveforms (float 32) moveouts_mat : (n_stations, n_components) array_like The moveouts (int 32) data : (n_stations, n_components, n_samples_continuous_data) array_like The continuous data (float 32) template_ids : (n_templates) array_like The template indexes (int 32) net : Network object from the dataset module threshold_type : string, optional Default is 'rms', the other option is 'mad'. Determines whether the detection threshold uses the rms or the mad of the correlation coefficient time series. weights_mat : (n_stations, n_components) array_like, optional Default is None, which attributes the same weight to all stations / components. The user can give a float 32 array of weights that will be used to calculate weighted averaged correlation coefficients. buf : bool, optional Default is True, which removes detections occuring in the data buffer. device : string, optional Default is 'gpu'. Determines whether Fast Matched Filter (FMF) runs on GPUs or CPUs (when 'cpu' is given instead). template_refinement : bool, optinal Default is False. If True, limits the number of detections to n_to_keep=50 per template. This choice reduces the time spent writing data and the size of the output. It is meant to be used during intermediate matched filter searches that aim at improving the quality of the templates by stacking the detections. Returns ------- list_metadata : (n_templates) list List containing n_templates dictionaries with metadata list_waveforms : (n_templates) list List containing n_templates (n_stations, n_components, n_samples_extracted) arrays storing the waveforms of the newly detected events. cc_sums : (n_templates, n_correlations) array_like Summed correlation coefficients output by FMF. """ if template_refinement: n_to_keep = 50 threshold_type = threshold_type.lower() nt, ns, nc, Nsamp = templates_mat.shape step = np.int32( cmn.to_samples(cfg.matched_filter_step, data['metadata']['sampling_rate'])) n_stations = data['waveforms'].shape[0] n_components = data['waveforms'].shape[1] n_samples_data = data['waveforms'].shape[2] n_samples = np.int32(cfg.multiplet_len * data['metadata']['sampling_rate']) # select 10s before the detection buffer_extracted_events = cfg.buffer_extracted_events if weights_mat is None: weights_mat = np.ones_like(moveouts_mat) for n in range(weights_mat.shape[0]): weights_mat[n, :] /= weights_mat[n, :].sum() CC_SUMS = [] Nparts = 2 L = ns // Nparts + 1 for i in range(Nparts): # to be memory friendly, we subdivide the network into Nparts # and the resulting correlation coefficients are then manually stacked # in a separate loop id1 = i * L id2 = (i + 1) * L if id2 > ns: id2 = ns cc_sums = fmf.matched_filter(templates_mat[:, id1:id2, :, :], moveouts_mat[:, id1:id2], weights_mat[:, id1:id2, :], data['waveforms'][id1:id2, :, :], step, arch=device) CC_SUMS.append(cc_sums) cc_sums = CC_SUMS[0] for i in range(1, Nparts): # stack the correlation coefficients cc_sums += CC_SUMS[i] cc_sums[np.isnan(cc_sums)] = 0 list_metadata = [] list_waveforms = [] for i in range(nt): cc_sum = cc_sums[i, :] if threshold_type == 'rms': cc_sum -= np.mean(cc_sum) threshold = cfg.matched_filter_threshold * cmn.rms(cc_sum) elif threshold_type == 'mad': cc_sum -= np.median(cc_sum) threshold = cfg.matched_filter_threshold * cmn.mad(cc_sum) # ------------------ cc_idx = np.argwhere(cc_sum > threshold) detections = cc_idx * step if buf: # remove detections from buffer limit = np.int32(cfg.data_buffer * data['metadata']['sampling_rate']) idx = detections >= limit cc_idx = cc_idx[idx] detections = detections[idx] limit = np.int32( (86400 + cfg.data_buffer) * data['metadata']['sampling_rate']) idx = detections < limit cc_idx = cc_idx[idx] detections = detections[idx] # only keep highest correlation coefficient for grouped detections # we assume the last component is the vertical component d_mv = moveouts_mat[i, :, 0] - moveouts_mat[i, :, -1] # fix the maximum window size to 3 times the template duration # fix the minimum window size to 1 time the templare duration # in between: choose an adaptive size based on the median # P-S time search_win = min( np.int32(3. * cfg.template_len * cfg.sampling_rate / step), max(np.int32(1. * np.median(d_mv[d_mv != 0]) / step), np.int32(cfg.template_len * cfg.sampling_rate / step))) for j in range(cc_idx.size): idx = np.arange(max(0, cc_idx[j] - search_win // 2), min(cc_sum.size - 1, cc_idx[j] + search_win // 2), dtype=np.int32) idx_to_update = np.where(cc_idx == cc_idx[j])[0] cc_idx[idx_to_update] = np.argmax(cc_sum[idx]) + idx[0] cc_idx = np.unique(cc_idx) detections = cc_idx * step # after this step, we can have detections closest than search_win / 2 cc_idx = list(cc_idx) Nrm = 0 for j in range(1, detections.size): if (cc_idx[j - Nrm] - cc_idx[j - Nrm - 1]) < search_win // 2: if cc_sum[cc_idx[j - Nrm]] > cc_sum[cc_idx[j - Nrm - 1]]: cc_idx.remove(cc_idx[j - Nrm - 1]) else: cc_idx.remove(cc_idx[j - Nrm]) Nrm += 1 cc_idx = np.asarray(cc_idx) detections = cc_idx * step n_multiplets = len(detections) # ------------------------------------------------------ metadata_events = {} waveforms_events = {} origin_times = np.zeros(n_multiplets, dtype=np.float64) correlation_coefficients = np.zeros(n_multiplets, dtype=np.float32) waveforms = np.zeros( (n_multiplets, n_stations, n_components, n_samples), dtype=np.float32) idx_min = 0 # can't extract continuous data before index 0 idx_max = n_samples_data # can't extract continuous data after # the last sample of the day for d in range(n_multiplets): origin_time = udt(data['metadata']['date']) \ + detections[d] / cfg.sampling_rate origin_times[d] = origin_time.timestamp \ - buffer_extracted_events \ - cfg.data_buffer correlation_coefficients[d] = cc_sum[cc_idx[d]] # ----------------------------------------- # take care of not selecting out-of-bound indexes: id1 = detections[d] - np.int32( buffer_extracted_events * cfg.sampling_rate) if id1 < idx_min: # will have to zero-pad the beginning of the extracted sequence dn_b = idx_min - id1 id2 = np.int32(id1 + n_samples) id1 = np.int32(idx_min) else: dn_b = 0 id2 = id1 + n_samples if id2 > idx_max: # will have to zero-pad the end of the extracted sequence dn_e = id2 - idx_max id2 = np.int32(idx_max) else: dn_e = 0 waveforms[d, :, :, :] = np.concatenate( (np.zeros((n_stations, n_components, dn_b), dtype=np.float32), data['waveforms'][:, :, id1:id2], np.zeros((n_stations, n_components, dn_e), dtype=np.float32)), axis=-1) # ----------------------------------------- if template_refinement and origin_times.size > n_to_keep: # only keep the n_to_keep best detections threshold_CC = np.sort(correlation_coefficients)[-n_to_keep] detections_to_keep = np.where( correlation_coefficients >= threshold_CC)[0] else: detections_to_keep = np.arange(origin_times.size) metadata_events.update({'template_id': np.array([template_ids[i]])}) metadata_events.update( {'stations': np.asarray(data['metadata']['stations']).astype('S')}) metadata_events.update({ 'components': np.asarray(data['metadata']['components']).astype('S') }) metadata_events.update( {'origin_times': origin_times[detections_to_keep]}) metadata_events.update({ 'correlation_coefficients': correlation_coefficients[detections_to_keep] }) waveforms_events.update({'waveforms': waveforms[detections_to_keep]}) list_metadata.append(metadata_events) list_waveforms.append(waveforms_events) return list_metadata, list_waveforms, cc_sums
n_stations = 20 n_components = 3 n_samples = np.int32(autodet.cfg.template_len * autodet.cfg.sampling_rate) origin_times = np.zeros(0, dtype=np.float64) locations = np.zeros((0, 3), dtype=np.float32) source_index = np.zeros(0, dtype=np.int32) moveouts = np.zeros((0, n_stations, n_components), dtype=np.float32) stations = np.zeros((0, n_stations), dtype=np.dtype('|S4')) components = np.zeros((0, n_components), dtype=np.dtype('|S4')) waveforms = np.zeros((0, n_stations, n_components, n_samples), dtype=np.float32) for i, idx in enumerate(I): filename = 'detections_{}_'.format( udt(features_dataset['days'][idx]).strftime('%Y%m%d')) index = features_dataset['indexes'][idx] #---------------------------- with h5.File(autodet.cfg.dbpath + filename + 'wav.h5', mode='r') as f: waveforms_ = f['waveforms'][index, :, :, :] with h5.File(autodet.cfg.dbpath + filename + 'meta.h5', mode='r') as f: stations_ = f['stations'][index, :] components_ = f['components'][()] moveouts_ = f['moveouts'][index, :, :] location_ = f['locations'][index, :] origin_t_ = f['origin_times'][index] source_idx_ = f['test_source_indexes'][index] waveforms = np.concatenate( (waveforms, waveforms_.reshape(1, n_stations, n_components, n_samples)), axis=0)
statistical_moment_peaks_1[d, s, c, 0] = np.std(peaks) statistical_moment_peaks_2[d, s, c, 0] = scistats.skew(peaks, bias=False) statistical_moment_peaks_3[d, s, c, 0] = scistats.kurtosis(peaks, bias=False) return np.concatenate( (statistical_moment_peaks_1, statistical_moment_peaks_2, statistical_moment_peaks_3) , axis=-1) def features_max_amp(detections): n_detections = detections['waveforms'].shape[0] n_stations = detections['waveforms'].shape[1] n_components = detections['waveforms'].shape[2] max_amp = np.zeros((n_detections, n_stations, n_components, 1), dtype=np.float32) for d in range(n_detections): max_amp[d, :, :, 0] = np.max(detections['waveforms'][d, :, :, :], axis=-1) return max_amp #dates = net.datelist() dates = [udt('2013,03,17')] av_features = [] features = [] filenames = [] days = [] indexes = [] for date in dates: print('Extract features for day {}'.format(date.strftime('%d-%m-%Y'))) filename = 'detections_{}_'.format(date.strftime('%Y%m%d')) day = date.strftime('%Y,%m,%d') detections = autodet.db_h5py.read_detections(filename, attach_waveforms=True) #----------------------------------------- # NORMALIZE THE TRACES for s in range(detections['waveforms'].shape[1]):
def plot_detection(idx): """ Plots the potential template event associated with detection #idx """ plt.figure('detection_{}'.format(idx)) n_stations = detections['waveforms'].shape[1] n_components = detections['waveforms'].shape[2] n_samples = detections['waveforms'].shape[3] max_time = autodet.cfg.template_len + detections['moveouts'][idx].max() plt.suptitle(r'Detection on {}, Event located at {:.2f}$^{{\mathrm{{o}}}}$ / {:.2f}$^{{\mathrm{{o}}}}$ / {:.2f}km'.format(udt(detections['origin_times'][idx]).strftime('%Y-%m-%d | %H:%M:%S'),\ detections['locations'][idx][0],\ detections['locations'][idx][1],\ detections['locations'][idx][2])) for s in range(n_stations): for c in range(n_components): plt.subplot(n_stations, n_components, s * n_components + c + 1) time = np.linspace(detections['moveouts'][idx, s, c], detections['moveouts'][idx, s, c] + autodet.cfg.template_len, detections['waveforms'].shape[-1]) plt.plot(time, detections['waveforms'][idx, s, c, :], lw=0.75, label='{}.{}'.format(detections['stations'][idx, s].decode('utf-8'), \ detections['components'][c].decode('utf-8'))) plt.xlim(0., max_time) plt.legend(loc='best', frameon=False, handlelength=0.1) if s != n_stations-1: plt.xticks([]) if s == n_stations-1: plt.xlabel('Time (s)') plt.subplots_adjust(top=0.94, bottom=0.085, left=0.065, right=0.955, hspace=0.2, wspace=0.2) plt.show()
elif method == 'SP': MV = autodet.moveouts.MV_object(filename, net, \ relativeSP=True, \ remove_airquakes=True) t2 = give_time() print('{:.2f}sec to load the moveouts.'.format(t2-t1)) test_points = np.arange(MV.n_sources, dtype=np.int32) # create a vector with indexes for every potential seismic source test_points = test_points[MV.idx_EQ] # remove the airquakes by removing some of the indexes band = [1., 12.] # used to know where to get the data if folders with different frequency bands exist (not relevant for this example) n_closest_stations = 20 # stacking is performed on the 20 stations closest to each grid point envelopes = True # CNR is calculated with the envelopes of the waveforms saturation = True # tanh is used to cutoff the 95th percentile of the envelopes dates = [udt('2013,03,17')] # from this example, you can see that it is easy to loop over several days and automize the processing... print('Dates to process:', dates) for date in dates: filename = 'detections_{}_'.format(date.strftime('%Y%m%d')) #if isfile(autodet.cfg.dbpath + filename + 'meta.h5'): # # you can comment this condition, but is usually useful when you process # # many days and have to re-run the processing and start from where # # your last run stopped # continue T1 = give_time() #------------------------------------------------------------ t1 = give_time() data = autodet.data.ReadData(date.strftime('%Y,%m,%d'), band) t2 = give_time()