path_classification = './classification/'
path_template_db = './template_event_database/'

classifier = models.load_model(path_classification + 'classifier.h5')

features_dataset = {}
with h5.File(path_classification + 'features_dataset.h5', mode='r') as f:
    for item in list(f.keys()):
        features_dataset[item] = f[item][()]

features = features_dataset['features']
n_detections = features.shape[0]
features = features.reshape(n_detections, -1)

t1 = give_time()
predictions = classifier.predict(features)
t2 = give_time()

print("{:.2f}sec to classify the {:d} detections".format(
    t2 - t1, n_detections))

I = np.where(predictions[:, 0] > 0.5)[0]

from keras.models import Model

layer_name = classifier.layers[1].name
intermediate_layer_model = Model(
    inputs=classifier.input, outputs=classifier.get_layer(layer_name).output)
intermediate_output = intermediate_layer_model.predict(features[I, :])
Example #2
0
def compute_envelopes(traces):
    start = give_time()
    traces = envelope_parallel(traces)  # take the upper envelope of the traces
    end = give_time()
    print('Computed the envelopes in {:.2f}sec.'.format(end - start))
    return traces
Example #3
0
def calc_network_response(data,
                          moveouts,
                          phase,
                          device='gpu',
                          n_closest_stations=None,
                          envelopes=True,
                          test_points=None,
                          saturation=False):
    """
    Calculates the network response of the specified stations and
    components with the given phase and moveouts.\n
    calc_network_response(day_data, moveouts, phase, stations, components,
                              device='gpu', closest=False)\n
    Can run on GPUs if device='gpu'. The argument 'closest' allows to use only the stations closest
    to each grid point in the computation of the network response.
    """
    ANOMALY_THRESHOLD = 1.e-12  # threshold used to determine if a trace is garbage or not
    # depends on which unit the trace is
    stations = data['metadata']['stations']
    components = data['metadata']['components']
    if isinstance(stations, str):
        stations = [stations]
    if isinstance(components, str):
        components = [components]

    traces = np.array(data['waveforms'], copy=True)
    #-----------------------------
    n_stations = traces.shape[0]
    n_components = traces.shape[1]
    n_samples = traces.shape[2]
    #-----------------------------

    # Initialize the network response object
    network_response = NetworkResponse(stations, components)

    if phase in ('p', 'P'):
        print(
            'Use the P-wave moveouts to compute the Composite Network Response'
        )
        moveout = moveouts.p_relative_samp
    elif phase in ('s', 'S'):
        print(
            'Use the S-wave moveouts to compute the Composite Network Response'
        )
        moveout = moveouts.s_relative_samp
    elif phase in ('sp', 'SP'):
        print(
            'Use the P- and S-wave moveouts to compute the Composite Network Response'
        )
        moveoutS = moveouts.s_relative_p_samp
        moveoutP = moveouts.p_relative_samp

    smooth_win = cmn.to_samples(cfg.smooth, data['metadata']['sampling_rate'])
    data_availability = np.zeros(n_stations, dtype=np.int32)

    if envelopes:
        window_length = cmn.to_samples(cfg.template_len,
                                       data['metadata']['sampling_rate'])
        start = give_time()
        detection_traces = envelope_parallel(
            traces)  # take the upper envelope of the traces
        end = give_time()
        print('Computed the envelopes in {:.2f}sec.'.format(end - start))
        for s in range(n_stations):
            for c in range(n_components):
                missing_samples = detection_traces[s, c, :] == 0.
                if np.sum(missing_samples) > detection_traces.shape[-1] / 2:
                    continue
                median = np.median(detection_traces[s, c, ~missing_samples])
                mad = cmn.mad(detection_traces[s, c, ~missing_samples])
                if mad < ANOMALY_THRESHOLD:
                    continue
                detection_traces[s, c, :] = (detection_traces[s, c, :] -
                                             median) / mad
                detection_traces[s, c, missing_samples] = 0.
                data_availability[s] += 1
    else:
        # compute the daily MADs (Median Absolute Deviation) to normalize the traces
        # this is an empirical way of correcting for instrument's sensitivity
        MADs = np.zeros((n_stations, n_components), dtype=np.float32)
        for s in range(n_stations):
            for c in range(n_components):
                traces[s, c, :] -= np.median(traces[s, c, :])
                mad = cmn.mad(traces[s, c, :])
                MADs[s, c] = np.float32(mad)
                if MADs[s, c] != 0.:
                    traces[s, c, :] /= MADs[s, c]
                    data_availability[s] += 1
        detection_traces = np.square(traces)

    # we consider data to be available if more than 1 channel were operational
    data_availability = data_availability > 1
    network_response.data_availability = data_availability
    print('{:d} / {:d} available stations'.format(data_availability.sum(),
                                                  data_availability.size))
    if data_availability.sum() < data_availability.size // 2:
        print('Less than half the stations are available, pass!')
        network_response.success = False
        return network_response
    else:
        network_response.success = True
    if n_closest_stations is not None:
        moveouts.get_closest_stations(data_availability, n_closest_stations)
        print(
            'Compute the beamformed network response only with the closest stations to each test seismic source'
        )
    else:
        moveouts.closest_stations_indexes = None

    if saturation:
        print('Saturate the high amplitudes by using hyperbolic tangent.')
        for s in range(n_stations):
            for c in range(n_components):
                # use a non-linear function that saturates after some threshold.
                # here we use tanh, which saturates after x = 1 (tanh(1.) = 0.76, tanh(+infinity) = 1.)
                # around 0, tanh behaves as identity
                saturation_factor = np.percentile(detection_traces[s, c, :],
                                                  95.00)
                if saturation_factor != 0.:
                    detection_traces[s, c, :] = np.tanh(
                        detection_traces[s, c, :] /
                        saturation_factor) * (saturation_factor / (np.pi / 2.))

    #traces = traces.squeeze()
    if phase in ('sp', 'SP'):
        composite, where = clib.network_response_SP(
            np.mean(detection_traces[:, :-1, :], axis=1),
            detection_traces[:, -1, :],
            moveoutP,
            moveoutS,
            smooth_win,
            device=device,
            closest_stations=moveouts.closest_stations_indexes,
            test_points=test_points)
        network_response.sp = True
    else:
        composite, where = clib.network_response(
            traces[:, 0, :],  # North component
            traces[:, 1, :],  # East component
            moveouts.cosine_azimuths,
            moveouts.sine_azimuths,
            moveout,
            smooth_win,
            device=device,
            closest_stations=moveouts.closest_stations_indexes,
            test_points=test_points)
        network_response.sp = False

    network_response.raw_composite = np.array(composite, copy=True)
    # remove the baseline
    window = np.int32(2. * 60. * cfg.sampling_rate)
    composite -= baseline(composite, window)
    smoothed = gaussian_filter1d(composite, np.int32(5. * cfg.sampling_rate))

    network_response.composite = composite
    network_response.where = where
    network_response.smoothed = smoothed
    return network_response
def test_matched_filter(n_templates=1, n_stations=1, n_components=1,
                        template_duration=10, data_duration=86400,
                        sampling_rate=100, step=1, arch='cpu',
                        check_zeros='first', normalize='short'):
    """Test the `matched_filter` function.  

    Generate random data, templates, and moveouts, and run a matched-filter
    search. The templates are sliced from the data, therefore the maximum
    correlation coefficient should always be one if the program ran normally.
    Try `normalize='full'` and/or `arch='precise' or 'gpu'` to achieve better
    numerical precision.

    Parameters
    ----------
    n_templates: scalar, int, optional
        Number of synthetic templates. Default to 1.
    n_stations: scalar, int, optional
        Number of stations. Default to 1.
    n_components: scalar, int, optional
        Number of components/channels. Default to 1.
    template_duration: scalar, float, optional
        Duration, in seconds, of the template waveforms. Default to 10s.
    data_duration: scalar, float, optional
        Duration, in seconds, of the data waveforms. Default to 86400s.
    sampling_rate: scalar, float, optional
        Sampling frequency (Hz) of the waveforms. Default to 100Hz.
    step: scalar, int
        Time interval, in samples, between consecutive correlations.
    arch: string, optional
        One `'cpu'`, `'precise'` or `'gpu'`. The `'precise'` implementation
        is a CPU implementation that slower but more accurate than `'cpu'`.
        The GPU implementation is used if `arch='gpu'`. Default is `'cpu'`.
    check_zeros: string, optional
        Controls the verbosity level at the end of this routine when
        checking zeros in the time series of correlation coefficients (CCs).  
        - False: No messages.  
        - `'first'`: Check zeros on the first template's CCs (recommended).  
        - `'all'`: Check zeros on each template's CCs. It can be useful for
        troubleshooting but in general this would print too many messages.  

        Default is `'first'`.
    normalize: string, optional
        Either "short" or "full" - full is slower but removes the mean of the
        data at every correlation. Short is the original implementation.
        NB: When using normalize="short", the templates and the data sliding
        windows must have zero means (high-pass filter the data if necessary).

    Returns
    --------
    templates: numpy.ndarray
        (n_templates, n_stations, n_components, n_tp_samples) `numpy.ndarray`
        with the random template waveforms generated by the function.
    moveouts: numpy.ndarray
        (n_templates, n_stations, n_components) `numpy.ndarray` with the random
        moveouts generated by the function.
    data: numpy.ndarray
        (n_stations, n_components, n_samples) `numpy.ndarray` with the random
        data generated by the function.
    step: scalar, int
        Time interval, in samples, between consecutive correlations.
    cc_sums: numpy.ndarray, float
        2D (n_templates, n_correlations) `numpy.ndarray`. The number of
        correlations is controlled by `step`.
    """
    from time import time as give_time
    template_times = np.random.random_sample(n_templates) * (data_duration / 2)
    # if step is not 1, not very likely that random times will be found
    if step != 1:
        template_times = np.round(template_times / (step / sampling_rate)) * (step / sampling_rate)
    # determines how many templates there are

    min_moveout = 0
    max_moveout = 10
    moveouts = np.zeros((n_templates, n_stations, n_components))
    for t in range(n_templates):
        for s in range(n_stations):
            moveouts[t, s, :] = (np.random.random_sample(n_components)
                              * (max_moveout - min_moveout)) + min_moveout
    moveouts = np.round(moveouts * sampling_rate)

    # generate data
    n_samples_data = data_duration * sampling_rate
    if float(int(n_samples_data)) == float(n_samples_data):
        n_samples_data = np.int32(n_samples_data)
    else:
        print('The data duration times the sampling rate yields a non-integer number of samples !')
        print('Adjust your input parameters so that this product is an integer.')
        return

    data = np.random.random_sample((n_stations, n_components, n_samples_data))
    for s in range(n_stations):
        for c in range(n_components):
            data[s, c, :] = data[s, c, :] - np.mean(data[s, c, :])

    # generate templates from data
    n_samples_template = template_duration * sampling_rate
    if float(int(n_samples_template)) == float(n_samples_template):
        n_samples_template = np.int32(n_samples_template)
    else:
        print('The template duration times the sampling rate yields a non-integer number of samples !')
        print('Adjust your input parameters so that this product is an integer.')
        return

    n_templates = template_times.size
    templates = np.zeros((n_templates,
                          n_stations,
                          n_components,
                          n_samples_template))
    for t in range(n_templates):
        start_t = template_times[t] * sampling_rate

        template = np.zeros((n_stations, n_components, n_samples_template))
        for s in range(n_stations):
            for c in range(n_components):
                start = int(start_t + np.round(moveouts[t, s, c]))
                stop = int(start_t + n_samples_template + np.round(moveouts[t, s, c]))
                template[s, c, :n_samples_template] = data[s, c, start:stop]

        templates[t, :, :, :n_samples_template] = template

    weights = np.ones((n_templates, n_stations, n_components)) / (n_stations * n_components)

    start_time = give_time()
    cc_sum = matched_filter(templates,
                            moveouts,
                            weights,
                            data,
                            step,
                            arch=arch,
                            check_zeros=check_zeros,
                            normalize=normalize)
    stop_time = give_time()

    print("Matched filter ({}) for {} templates on {} stations/{} "
            "components over {} samples ({} step) ran in {:.3f}s".
          format(arch, n_templates, n_stations, n_components, n_samples_data,
                 step, (stop_time - start_time)))

    return templates, moveouts, data, step, cc_sum
def SVDWF_multiplets_bulk(template_id, db_path=autodet.cfg.dbpath, db_path_M='matched_filter_1/', db_path_T='template_db_1/', \
                                       WAVEFORMS=None, best=False, normRMS=True, \
                                       n_singular_values=5, max_freq=autodet.cfg.max_freq, attach_raw_data=False):
    from obspy import Stream, Trace
    from scipy.linalg import svd
    from scipy.signal import wiener

    #-----------------------------------------------------------------------------------------------
    T = autodet.db_h5py.read_template('template{:d}'.format(template_id), db_path=db_path+db_path_T)
    #-----------------------------------------------------------------------------------------------
    files_all = glob.glob(db_path + db_path_M + '*multiplets_*meta.h5')
    files     = []
    #------------------------------
    S  = Stream()
    CC = []
    tid_str = str(template_id)
    t1 = give_time()
    for file in files_all:
        with h5.File(file, mode='r') as f:
            if tid_str in f.keys():
                files.append(file[:-len('meta.h5')])
                CC.extend(f[tid_str]['correlation_coefficients'][()].tolist())
    CC = np.float32(CC)
    t2 = give_time()
    print('{:.2f} s to retrieve the correlation coefficients.'.format(t2-t1))
    if len(files) == 0:
        print("None multiplet for template {:d} !! Return None".format(template_id))
        return None
    with h5.File(files[0] + 'meta.h5', mode='r') as f:
        S.stations   = f[tid_str]['stations'][()].astype('U').tolist()
        S.components = f[tid_str]['components'][()].astype('U').tolist()
    ns = len(S.stations)
    nc = len(S.components)
    S.latitude  = T.metadata['latitude']
    S.longitude = T.metadata['longitude']
    S.depth     = T.metadata['depth']
    #------------------------------
    #----------------------------------------------
    if WAVEFORMS is None:
        CC = np.sort(CC)
        if len(CC) > 300:
            CC_thres = CC[-101] 
        elif len(CC) > 70:
            CC_thres = CC[int(7./10.*len(CC))] # the best 30%
        elif len(CC) > 30:
            CC_thres = np.median(CC) # the best 50%
        elif len(CC) > 10:
            CC_thres = np.percentile(CC, 33.) # the best 66% detections 
        else:
            CC_thres = 0.
        Nstack = np.zeros((ns, nc), dtype=np.float32)
        WAVEFORMS  = []
        Nmulti = 0
        t1 = give_time()
        for file in files:
            if best:
                with h5.File(file + 'meta.h5', mode='r') as fm:
                    selection = np.where(fm[tid_str]['correlation_coefficients'][:] > CC_thres)[0]
                    if selection.size == 0:
                        continue
                with h5.File(file + 'wav.h5', mode='r') as fw:
                    WAVEFORMS.append(fw[tid_str]['waveforms'][selection, :, :, :])
            else:
                with h5.File(file + 'wav.h5', mode='r') as fw:
                    WAVEFORMS.append(fw[tid_str]['waveforms'][()])
            Nmulti += WAVEFORMS[-1].shape[0]
            for m in range(WAVEFORMS[-1].shape[0]):
                for s in range(ns):
                    for c in range(nc):
                        if normRMS:
                            norm = np.sqrt(np.var(WAVEFORMS[-1][m,s,c,:]))
                        else:
                            norm =1.
                        if norm != 0.:
                            WAVEFORMS[-1][m,s,c,:] /= norm
        t2 = give_time()
        print('{:.2f} s to retrieve the waveforms.'.format(t2-t1))
    elif normRMS:
        for m in range(WAVEFORMS.shape[0]):
            for s in range(ns):
                for c in range(nc):
                    norm = np.sqrt(np.var(WAVEFORMS[m,s,c,:]))
                    if norm != 0.:
                        WAVEFORMS[m,s,c,:] /= norm
    else:
        pass
    WAVEFORMS = np.vstack(WAVEFORMS)
    WAVEFORMS = WAVEFORMS.reshape(-1, ns, nc, WAVEFORMS.shape[-1])
    print(WAVEFORMS.shape)
    filtered_data = np.zeros_like(WAVEFORMS)
    for s in range(ns):
        for c in range(nc):
            filtered_data[:,s,c,:] = SVDWF(WAVEFORMS[:,s,c,:], n_singular_values, max_freq=max_freq)
            #filtered_data[:,s,c,:] = spectral_filtering(WAVEFORMS[:,s,c,:], SNR_thres=5., max_freq=max_freq)
            mean = np.mean(filtered_data[:,s,c,:], axis=0)
            mean /= np.abs(mean).max()
            S += Trace(data=mean)
            S[-1].stats.station = S.stations[s]
            S[-1].stats.channel = S.components[c]
            S[-1].stats.sampling_rate = autodet.cfg.sampling_rate
    S.data = filtered_data
    if attach_raw_data:
        S.raw_data = WAVEFORMS
    S.Nmulti = Nmulti
    return S
Example #6
0
# if you have Nvidia GPUs:
device = 'gpu'
# else:
#device ='cpu'

print('The codes will run on {}'.format(device))
print('If this was not your intention, edit this script and comment the right line for the variable "device".')

# whether you use the P-wave moveouts to align the vertical trace or not
#method = 'S'
method = 'SP'

filename = 'subgrid_downsampled'

t1 = give_time()
if method == 'S':
    MV = autodet.moveouts.MV_object(filename, net, \
                                    relative=True, \
                                    remove_airquakes=True)
elif method == 'SP':
    MV = autodet.moveouts.MV_object(filename, net, \
                                    relativeSP=True, \
                                    remove_airquakes=True)
t2 = give_time()
print('{:.2f}sec to load the moveouts.'.format(t2-t1))

test_points = np.arange(MV.n_sources, dtype=np.int32) # create a vector with indexes for every potential seismic source
test_points = test_points[MV.idx_EQ] # remove the airquakes by removing some of the indexes

band = [1., 12.] # used to know where to get the data if folders with different frequency bands exist (not relevant for this example)