def plot_template_event(idx):
    filename = 'detections_{}_'.format(
        udt(features_dataset['days'][idx]).strftime('%Y%m%d'))
    index = features_dataset['indexes'][idx]
    #----------------------------
    with h5.File(autodet.cfg.dbpath + filename + 'wav.h5', mode='r') as f:
        waveforms_ = f['waveforms'][index, :, :, :]
    with h5.File(autodet.cfg.dbpath + filename + 'meta.h5', mode='r') as f:
        stations_ = f['stations'][index, :]
        components_ = f['components'][()]
        moveouts_ = np.int32(f['moveouts'][index, :, :] *
                             autodet.cfg.sampling_rate)
        location_ = f['locations'][index, :]
        origin_t_ = udt(f['origin_times'][index])
    n_stations = len(stations_)
    n_components = len(components_)
    t_min = 0
    t_max = moveouts_.max() + waveforms_.shape[-1]
    fig = plt.figure('detection_{:d}'.format(idx), figsize=(27, 17))
    plt.suptitle('Detection on {} from {:.2f}|{:.2f}|{:.2f}km'.format(
        origin_t_.strftime('%Y,%m,%d %H:%M:%S'), location_[0], location_[1],
        location_[2]))
    for s in range(n_stations):
        for c in range(n_components):
            plt.subplot(n_stations, n_components, s * n_components + c + 1)
            time = np.arange(waveforms_.shape[-1]) + moveouts_[s, c]
            plt.plot(time,
                     waveforms_[s, c, :],
                     label='{}.{}'.format(stations_[s].astype('U'),
                                          components_[c].astype('U')))
            plt.xlim(t_min, t_max)
            plt.legend(loc='upper left', frameon=False)
    plt.subplots_adjust(top=0.94, bottom=0.08, left=0.05, right=0.95)
    plt.show()
def ReadData(date, band):
    """
    ReadData(date, band)
    """
    date = udt(date)
    filename = 'data_{:d}_{:d}/data_{}.h5'.format(int(band[0]), int(band[1]),
                                                  date.strftime('%Y%m%d'))
    data = {}
    data.update({'metadata': {}})
    with h5.File(cfg.input_path + filename, mode='r') as f:
        data.update({'waveforms': f['waveforms'][()]})
        for item in f['metadata'].keys():
            if len(f['metadata'][item][()]) == 1:
                data['metadata'].update({item: f['metadata'][item][()][0]})
            else:
                data['metadata'].update({item: f['metadata'][item][()]})
    data['metadata'].update({'date': udt(data['metadata']['date'])})
    for item in ['stations', 'components']:
        data['metadata'][item] = data['metadata'][item].astype('U')
    return data
예제 #3
0
def plot_composite_network_response(composite):
    """
    Plots the composite network response, shows which peaks are detections and the time-varying threshold.
    """
    plt.figure('composite_network_response')
    ax1     = plt.subplot2grid((1, 4), (0,0))
    _, _, _ = plt.hist(composite, bins=100)
    mad     = np.median(np.abs(composite - np.median(composite)))
    threshold1 = np.median(composite) + autodet.cfg.ratio_threshold * mad
    plt.axvline(threshold1, lw=2, ls='--', color='k', label=r'Global $\mathrm{{median}}\ + {:d} \times \mathrm{{MAD}}$'.format(int(autodet.cfg.ratio_threshold)))
    plt.legend(loc='upper right', fancybox=True)
    plt.xlabel('Composite Network Response')
    plt.ylabel('Frequency')
    #--------------------------------------------------
    ax2     = plt.subplot2grid((1, 4), (0, 1), colspan=3)
    plt.plot(composite)
    n_detections = detections['origin_times'].size
    T0 = udt(udt(detections['origin_times'][0]).strftime('%Y,%m,%d')).timestamp
    for d in range(n_detections-1):
        idx = np.int32((detections['origin_times'][d] - T0) * autodet.cfg.sampling_rate)
        plt.plot(idx, detections['composite_network_response'][d], ls='', marker='o', color='C3')
    idx = np.int32((detections['origin_times'][n_detections-1] - T0) * autodet.cfg.sampling_rate)
    plt.plot(idx, detections['composite_network_response'][n_detections-1], ls='', marker='o', color='C3', label='Candidate Template Events')
    threshold2 = autodet.template_search.time_dependent_threshold(composite, np.int32(0.5 * 3600. * autodet.cfg.sampling_rate))
    plt.plot(threshold2, color='C5', ls='--', lw=2, label=r'Sliding $\mathrm{{median}}\ + {:d} \times \mathrm{{MAD}}$'.format(int(autodet.cfg.ratio_threshold)))
    xticks       = np.arange(0, composite.size+1, np.int32(2.*3600.*autodet.cfg.sampling_rate))
    plt.legend(loc='upper right', fancybox=True)
    xtick_labels = [udt(X/autodet.cfg.sampling_rate).strftime('%H:%M:%S') for X in xticks]
    plt.xticks(xticks, xtick_labels)
    plt.xlim(0, composite.size+1)
    plt.grid(axis='x')
    plt.xlabel('Time')
    plt.ylabel('Composite Network Response')
    #----------------------------
    plt.subplots_adjust(top=0.88,
            bottom=0.5,
            left=0.07,
            right=0.97,
            hspace=0.245,
            wspace=0.275)
    plt.show()
def find_multiplets(templates_mat,
                    moveouts_mat,
                    data,
                    template_ids,
                    net,
                    threshold_type='rms',
                    weights_mat=None,
                    buf=True,
                    device='gpu',
                    template_refinement=False):
    """
    Find repetitions of the template waveforms, i.e. multiplets

    Parameters
    ----------
    templates_mat : (n_stations, n_components, n_samples_template) array_like
        The template waveforms (float 32)
    moveouts_mat : (n_stations, n_components) array_like
        The moveouts (int 32)
    data : (n_stations, n_components, n_samples_continuous_data) array_like
        The continuous data (float 32)
    template_ids : (n_templates) array_like
        The template indexes (int 32)
    net : Network object from the dataset module
    threshold_type : string, optional
        Default is 'rms', the other option is 'mad'. Determines whether the
        detection threshold uses the rms or the mad of the correlation
        coefficient time series.
    weights_mat : (n_stations, n_components) array_like, optional
        Default is None, which attributes the same weight to all
        stations / components. The user can give a float 32 array of weights
        that will be used to calculate weighted averaged correlation
        coefficients.
    buf : bool, optional
        Default is True, which removes detections occuring in the data buffer.
    device : string, optional
    Default is 'gpu'. Determines whether Fast Matched Filter (FMF) runs on GPUs
    or CPUs (when 'cpu' is given instead).
    template_refinement : bool, optinal
        Default is False. If True, limits the number of detections to
        n_to_keep=50 per template. This choice reduces the time spent writing
        data and the size of the output. It is meant to be used during
        intermediate matched filter searches that aim at improving the quality
        of the templates by stacking the detections.

    Returns
    -------
    list_metadata : (n_templates) list
        List containing n_templates dictionaries with metadata
    list_waveforms : (n_templates) list
        List containing n_templates (n_stations, n_components,
        n_samples_extracted) arrays storing the waveforms of the newly
        detected events.
    cc_sums : (n_templates, n_correlations) array_like
        Summed correlation coefficients output by FMF.
    """
    if template_refinement:
        n_to_keep = 50

    threshold_type = threshold_type.lower()

    nt, ns, nc, Nsamp = templates_mat.shape

    step = np.int32(
        cmn.to_samples(cfg.matched_filter_step,
                       data['metadata']['sampling_rate']))

    n_stations = data['waveforms'].shape[0]
    n_components = data['waveforms'].shape[1]
    n_samples_data = data['waveforms'].shape[2]
    n_samples = np.int32(cfg.multiplet_len * data['metadata']['sampling_rate'])

    # select 10s before the detection
    buffer_extracted_events = cfg.buffer_extracted_events

    if weights_mat is None:
        weights_mat = np.ones_like(moveouts_mat)
        for n in range(weights_mat.shape[0]):
            weights_mat[n, :] /= weights_mat[n, :].sum()

    CC_SUMS = []
    Nparts = 2
    L = ns // Nparts + 1
    for i in range(Nparts):
        # to be memory friendly, we subdivide the network into Nparts
        # and the resulting correlation coefficients are then manually stacked
        # in a separate loop
        id1 = i * L
        id2 = (i + 1) * L
        if id2 > ns:
            id2 = ns
        cc_sums = fmf.matched_filter(templates_mat[:, id1:id2, :, :],
                                     moveouts_mat[:, id1:id2],
                                     weights_mat[:, id1:id2, :],
                                     data['waveforms'][id1:id2, :, :],
                                     step,
                                     arch=device)
        CC_SUMS.append(cc_sums)
    cc_sums = CC_SUMS[0]
    for i in range(1, Nparts):
        # stack the correlation coefficients
        cc_sums += CC_SUMS[i]

    cc_sums[np.isnan(cc_sums)] = 0

    list_metadata = []
    list_waveforms = []
    for i in range(nt):
        cc_sum = cc_sums[i, :]

        if threshold_type == 'rms':
            cc_sum -= np.mean(cc_sum)
            threshold = cfg.matched_filter_threshold * cmn.rms(cc_sum)
        elif threshold_type == 'mad':
            cc_sum -= np.median(cc_sum)
            threshold = cfg.matched_filter_threshold * cmn.mad(cc_sum)
        # ------------------
        cc_idx = np.argwhere(cc_sum > threshold)
        detections = cc_idx * step

        if buf:
            # remove detections from buffer
            limit = np.int32(cfg.data_buffer *
                             data['metadata']['sampling_rate'])
            idx = detections >= limit
            cc_idx = cc_idx[idx]
            detections = detections[idx]

            limit = np.int32(
                (86400 + cfg.data_buffer) * data['metadata']['sampling_rate'])
            idx = detections < limit
            cc_idx = cc_idx[idx]
            detections = detections[idx]

        # only keep highest correlation coefficient for grouped detections
        # we assume the last component is the vertical component
        d_mv = moveouts_mat[i, :, 0] - moveouts_mat[i, :, -1]
        # fix the maximum window size to 3 times the template duration
        # fix the minimum window size to 1 time the templare duration
        # in between: choose an adaptive size based on the median
        # P-S time
        search_win = min(
            np.int32(3. * cfg.template_len * cfg.sampling_rate / step),
            max(np.int32(1. * np.median(d_mv[d_mv != 0]) / step),
                np.int32(cfg.template_len * cfg.sampling_rate / step)))
        for j in range(cc_idx.size):
            idx = np.arange(max(0, cc_idx[j] - search_win // 2),
                            min(cc_sum.size - 1, cc_idx[j] + search_win // 2),
                            dtype=np.int32)
            idx_to_update = np.where(cc_idx == cc_idx[j])[0]
            cc_idx[idx_to_update] = np.argmax(cc_sum[idx]) + idx[0]

        cc_idx = np.unique(cc_idx)
        detections = cc_idx * step

        # after this step, we can have detections closest than search_win / 2
        cc_idx = list(cc_idx)
        Nrm = 0
        for j in range(1, detections.size):
            if (cc_idx[j - Nrm] - cc_idx[j - Nrm - 1]) < search_win // 2:
                if cc_sum[cc_idx[j - Nrm]] > cc_sum[cc_idx[j - Nrm - 1]]:
                    cc_idx.remove(cc_idx[j - Nrm - 1])
                else:
                    cc_idx.remove(cc_idx[j - Nrm])
                Nrm += 1
        cc_idx = np.asarray(cc_idx)
        detections = cc_idx * step

        n_multiplets = len(detections)
        # ------------------------------------------------------
        metadata_events = {}
        waveforms_events = {}
        origin_times = np.zeros(n_multiplets, dtype=np.float64)
        correlation_coefficients = np.zeros(n_multiplets, dtype=np.float32)
        waveforms = np.zeros(
            (n_multiplets, n_stations, n_components, n_samples),
            dtype=np.float32)
        idx_min = 0  # can't extract continuous data before index 0
        idx_max = n_samples_data  # can't extract continuous data after
        #                           the last sample of the day
        for d in range(n_multiplets):
            origin_time = udt(data['metadata']['date']) \
                          + detections[d] / cfg.sampling_rate
            origin_times[d] = origin_time.timestamp \
                - buffer_extracted_events \
                - cfg.data_buffer
            correlation_coefficients[d] = cc_sum[cc_idx[d]]
            # -----------------------------------------
            # take care of not selecting out-of-bound indexes:
            id1 = detections[d] - np.int32(
                buffer_extracted_events * cfg.sampling_rate)
            if id1 < idx_min:
                # will have to zero-pad the beginning of the extracted sequence
                dn_b = idx_min - id1
                id2 = np.int32(id1 + n_samples)
                id1 = np.int32(idx_min)
            else:
                dn_b = 0
                id2 = id1 + n_samples
            if id2 > idx_max:
                # will have to zero-pad the end of the extracted sequence
                dn_e = id2 - idx_max
                id2 = np.int32(idx_max)
            else:
                dn_e = 0
            waveforms[d, :, :, :] = np.concatenate(
                (np.zeros((n_stations, n_components, dn_b),
                          dtype=np.float32), data['waveforms'][:, :, id1:id2],
                 np.zeros((n_stations, n_components, dn_e), dtype=np.float32)),
                axis=-1)
            # -----------------------------------------
        if template_refinement and origin_times.size > n_to_keep:
            # only keep the n_to_keep best detections
            threshold_CC = np.sort(correlation_coefficients)[-n_to_keep]
            detections_to_keep = np.where(
                correlation_coefficients >= threshold_CC)[0]
        else:
            detections_to_keep = np.arange(origin_times.size)
        metadata_events.update({'template_id': np.array([template_ids[i]])})
        metadata_events.update(
            {'stations': np.asarray(data['metadata']['stations']).astype('S')})
        metadata_events.update({
            'components':
            np.asarray(data['metadata']['components']).astype('S')
        })
        metadata_events.update(
            {'origin_times': origin_times[detections_to_keep]})
        metadata_events.update({
            'correlation_coefficients':
            correlation_coefficients[detections_to_keep]
        })
        waveforms_events.update({'waveforms': waveforms[detections_to_keep]})

        list_metadata.append(metadata_events)
        list_waveforms.append(waveforms_events)
    return list_metadata, list_waveforms, cc_sums
n_stations = 20
n_components = 3
n_samples = np.int32(autodet.cfg.template_len * autodet.cfg.sampling_rate)

origin_times = np.zeros(0, dtype=np.float64)
locations = np.zeros((0, 3), dtype=np.float32)
source_index = np.zeros(0, dtype=np.int32)
moveouts = np.zeros((0, n_stations, n_components), dtype=np.float32)
stations = np.zeros((0, n_stations), dtype=np.dtype('|S4'))
components = np.zeros((0, n_components), dtype=np.dtype('|S4'))
waveforms = np.zeros((0, n_stations, n_components, n_samples),
                     dtype=np.float32)

for i, idx in enumerate(I):
    filename = 'detections_{}_'.format(
        udt(features_dataset['days'][idx]).strftime('%Y%m%d'))
    index = features_dataset['indexes'][idx]
    #----------------------------
    with h5.File(autodet.cfg.dbpath + filename + 'wav.h5', mode='r') as f:
        waveforms_ = f['waveforms'][index, :, :, :]
    with h5.File(autodet.cfg.dbpath + filename + 'meta.h5', mode='r') as f:
        stations_ = f['stations'][index, :]
        components_ = f['components'][()]
        moveouts_ = f['moveouts'][index, :, :]
        location_ = f['locations'][index, :]
        origin_t_ = f['origin_times'][index]
        source_idx_ = f['test_source_indexes'][index]
    waveforms = np.concatenate(
        (waveforms, waveforms_.reshape(1, n_stations, n_components,
                                       n_samples)),
        axis=0)
예제 #6
0
                statistical_moment_peaks_1[d, s, c, 0] = np.std(peaks)
                statistical_moment_peaks_2[d, s, c, 0] = scistats.skew(peaks,     bias=False)
                statistical_moment_peaks_3[d, s, c, 0] = scistats.kurtosis(peaks, bias=False)
    return np.concatenate( (statistical_moment_peaks_1, statistical_moment_peaks_2, statistical_moment_peaks_3) , axis=-1)

def features_max_amp(detections):
    n_detections = detections['waveforms'].shape[0]
    n_stations   = detections['waveforms'].shape[1]
    n_components = detections['waveforms'].shape[2]
    max_amp      = np.zeros((n_detections, n_stations, n_components, 1), dtype=np.float32)
    for d in range(n_detections):
            max_amp[d, :, :, 0]  = np.max(detections['waveforms'][d, :, :, :], axis=-1)
    return max_amp

#dates = net.datelist()
dates = [udt('2013,03,17')]

av_features = []
features    = []
filenames   = []
days        = []
indexes     = []

for date in dates:
    print('Extract features for day {}'.format(date.strftime('%d-%m-%Y')))
    filename      = 'detections_{}_'.format(date.strftime('%Y%m%d'))
    day           = date.strftime('%Y,%m,%d')
    detections    = autodet.db_h5py.read_detections(filename, attach_waveforms=True)
    #-----------------------------------------
    #        NORMALIZE THE TRACES
    for s in range(detections['waveforms'].shape[1]):
예제 #7
0
def plot_detection(idx):
    """
    Plots the potential template event associated with detection #idx
    """
    plt.figure('detection_{}'.format(idx))
    n_stations   = detections['waveforms'].shape[1]
    n_components = detections['waveforms'].shape[2]
    n_samples    = detections['waveforms'].shape[3]
    max_time     = autodet.cfg.template_len + detections['moveouts'][idx].max()
    plt.suptitle(r'Detection on {}, Event located at {:.2f}$^{{\mathrm{{o}}}}$ / {:.2f}$^{{\mathrm{{o}}}}$ / {:.2f}km'.format(udt(detections['origin_times'][idx]).strftime('%Y-%m-%d | %H:%M:%S'),\
                                                                                                                              detections['locations'][idx][0],\
                                                                                                                              detections['locations'][idx][1],\
                                                                                                                              detections['locations'][idx][2]))
    for s in range(n_stations):
        for c in range(n_components):
            plt.subplot(n_stations, n_components, s * n_components + c + 1)
            time = np.linspace(detections['moveouts'][idx, s, c], detections['moveouts'][idx, s, c] + autodet.cfg.template_len, detections['waveforms'].shape[-1])
            plt.plot(time, detections['waveforms'][idx, s, c, :], lw=0.75, label='{}.{}'.format(detections['stations'][idx, s].decode('utf-8'), \
                                                                                                detections['components'][c].decode('utf-8')))
            plt.xlim(0., max_time)
            plt.legend(loc='best', frameon=False, handlelength=0.1)
            if s != n_stations-1:
                plt.xticks([])
            if s == n_stations-1:
                plt.xlabel('Time (s)')
    plt.subplots_adjust(top=0.94,
            bottom=0.085,
            left=0.065,
            right=0.955,
            hspace=0.2,
            wspace=0.2)
    plt.show()
예제 #8
0
elif method == 'SP':
    MV = autodet.moveouts.MV_object(filename, net, \
                                    relativeSP=True, \
                                    remove_airquakes=True)
t2 = give_time()
print('{:.2f}sec to load the moveouts.'.format(t2-t1))

test_points = np.arange(MV.n_sources, dtype=np.int32) # create a vector with indexes for every potential seismic source
test_points = test_points[MV.idx_EQ] # remove the airquakes by removing some of the indexes

band = [1., 12.] # used to know where to get the data if folders with different frequency bands exist (not relevant for this example)
n_closest_stations = 20 # stacking is performed on the 20 stations closest to each grid point
envelopes  = True  # CNR is calculated with the envelopes of the waveforms
saturation = True  # tanh is used to cutoff the 95th percentile of the envelopes

dates = [udt('2013,03,17')] # from this example, you can see that it is easy to loop over several days and automize the processing...

print('Dates to process:', dates)

for date in dates:
    filename = 'detections_{}_'.format(date.strftime('%Y%m%d'))
    #if isfile(autodet.cfg.dbpath + filename + 'meta.h5'):
    #    # you can comment this condition, but is usually useful when you process
    #    # many days and have to re-run the processing and start from where
    #    # your last run stopped
    #    continue
    T1 = give_time()
    #------------------------------------------------------------
    t1 = give_time()
    data = autodet.data.ReadData(date.strftime('%Y,%m,%d'), band)
    t2 = give_time()