def getMetrics(trace): data = trace.data mean = data.mean() median = np.median(data) stdv = data.std() maximum = np.amax(data) trace.taper(type='hamming', max_percentage=0.05, max_length=5) data = trace.data repFreq = central_frequency_unwindowed(data, df) filtered = bandpass(data, 0.01, 1.5625, df) sumEnergy = np.sum( welch(filtered, np.hamming(len(data)), next_pow_2(len(data)))) return [mean, median, stdv, maximum, repFreq, sumEnergy]
def preprocess(df): df.drop([ 'receiver_latitude', 'receiver_longitude', 'receiver_elevation_m', 'p_arrival_sample', 'p_travel_sec', 's_arrival_sample', 'source_origin_time', 'source_latitude', 'source_longitude', 'source_depth_km' ], axis=1, inplace=True) # Degree of rectiliniarity (polarization) print(' Processing - Degree of rectiliniarity (polarization)') df['rect_azimuth'], df['rect_incidence'], df['rect_rectilinearity'], df[ 'rect_planarity'] = zip( *df.apply(lambda x: flinn([x['Z'], x['N'], x['E']]), axis=1)) # trace-by-trace features print(' Starting Trace-By-Trace feature processing') trace_list = ['E', 'N', 'Z'] for tl in trace_list: # SPECTRAL CENTROID print(' Processing Spectral Centroid') df['spectral_centroid_{}'.format(tl)] = df[tl].apply( _spectral_centroid) # RMS of frequency amplitude print(' RMS of frequency amplitude') df['rms_freq_amp_{}'.format(tl)] = df[tl].apply( lambda x: np.sqrt(np.mean(np.square(np.real(np.fft.fft(x)))))) # Maximum power of frequency amplitude print(' Maximum power of frequency amplitude') df['max_power_freq_amp_{}'.format(tl)] = df[tl].apply( lambda x: np.sqrt( signal.periodogram(x, 100, 'flattop', scaling='spectrum')[1]. max())) # Dominant frequency print(' Dominant frequency') df['dominant_freq_{}'.format(tl)] = df[tl].apply( lambda x: central_frequency_unwindowed(x, fs=100)) # return trace_id and model columns return df[[ 'trace_id', 'snr_db_E', 'snr_db_N', 'snr_db_Z', 'spectral_centroid_E', 'spectral_centroid_N', 'spectral_centroid_Z', 'rect_azimuth', 'rect_incidence', 'rect_rectilinearity', 'rect_planarity', 'rms_freq_amp_E', 'rms_freq_amp_N', 'rms_freq_amp_Z', 'max_power_freq_amp_E', 'max_power_freq_amp_N', 'max_power_freq_amp_Z', 'dominant_freq_E', 'dominant_freq_N', 'dominant_freq_Z' ]]
def plotBandSpec(trace, mode='save', low=24.99, high=0.001): #copy the data df = 50 #demean, detrend, bandpass filter trace.detrend('demean') trace.detrend('linear') trace.filter('bandpass', freqmin=high, freqmax=low, corners=2, zerophase=True) traceCopy = trace.copy() data = traceCopy.data repFreq = central_frequency_unwindowed(data, df) bw = bandwidth(data, df) print("Central frequency: " + str(repFreq) + "\nBandwidth: " + str(bw))
def preprocess(df): # Rolling AVG traces print('Calculate rolling Avg') df['E_MA250'] = df['E'].apply(lambda x: moving_average(x, 250)) df['N_MA250'] = df['N'].apply(lambda x: moving_average(x, 250)) df['Z_MA250'] = df['Z'].apply(lambda x: moving_average(x, 250)) df['E_MA1000'] = df['E'].apply(lambda x: moving_average(x, 1000)) df['N_MA1000'] = df['N'].apply(lambda x: moving_average(x, 1000)) df['Z_MA1000'] = df['Z'].apply(lambda x: moving_average(x, 1000)) # Degree of rectiliniarity (polarization) print(' Processing - Degree of rectiliniarity (polarization)') df['rect_azimuth'], df['rect_incidence'], df['rect_rectilinearity'], df[ 'rect_planarity'] = zip( *df.apply(lambda x: flinn([x['Z'], x['N'], x['E']]), axis=1)) df['rect_azimuth_MA250'], df['rect_incidence_MA250'], df[ 'rect_rectilinearity_MA250'], df['rect_planarity_MA250'] = zip( *df.apply( lambda x: flinn([x['Z_MA250'], x['N_MA250'], x['E_MA250']]), axis=1)) df['rect_azimuth_MA1000'], df['rect_incidence_MA1000'], df[ 'rect_rectilinearity_MA1000'], df['rect_planarity_MA1000'] = zip( *df.apply( lambda x: flinn([x['Z_MA1000'], x['N_MA1000'], x['E_MA1000']]), axis=1)) # trace-by-trace features print(' Starting Trace-By-Trace feature processing') trace_list = ['E', 'N', 'Z'] # Envelope similarity print(' Processing - Envelope Similarity') for tl in trace_list: df['{}_env_sim_deep_max'.format(tl)], df['{}_env_sim_deep_mean'.format( tl)], df['{}_env_sim_shallow_max'.format(tl)], df[ '{}_env_sim_shallow_mean'.format(tl)] = zip( *df[tl].apply(lambda x: envelope_similarity(x))) trace_list_MA = [ 'E', 'N', 'Z', 'E_MA250', 'N_MA250', 'Z_MA250', 'E_MA1000', 'N_MA1000', 'Z_MA1000' ] for tl in trace_list_MA: # SPECTRAL CENTROID print(' Processing Spectral Centroid - {}'.format(tl)) df['spectral_centroid_{}'.format(tl)] = df[tl].apply( _spectral_centroid) # RMS of frequency amplitude print(' RMS of frequency amplitude - {}'.format(tl)) df['rms_freq_amp_{}'.format(tl)] = df[tl].apply( lambda x: np.sqrt(np.mean(np.square(np.real(np.fft.fft(x)))))) # Maximum power of frequency amplitude print(' Maximum power of frequency amplitude - {}'.format(tl)) df['max_power_freq_amp_{}'.format(tl)] = df[tl].apply( lambda x: np.sqrt( signal.periodogram(x, 100, 'flattop', scaling='spectrum')[1]. max())) # Dominant frequency print(' Dominant frequency - {}'.format(tl)) df['dominant_freq_{}'.format(tl)] = df[tl].apply( lambda x: central_frequency_unwindowed(x, fs=100)) # Waveform correlation print(' Waveform Correlation - {}'.format(tl)) df['xcor_{}_deep_max'.format(tl)], df['xcor_{}_deep_mean'.format( tl)], df['xcor_{}_shallow_max'.format(tl)], df[ 'xcor_{}_shallow_mean'.format(tl)] = zip( *df[tl].apply(lambda x: waveform_xc_properties(x))) # return trace_id and model columns return df[[ 'trace_id', 'spectral_centroid_E', 'spectral_centroid_N', 'spectral_centroid_Z', 'spectral_centroid_E_MA250', 'spectral_centroid_N_MA250', 'spectral_centroid_Z_MA250', 'spectral_centroid_E_MA1000', 'spectral_centroid_N_MA1000', 'spectral_centroid_Z_MA1000', 'rect_azimuth', 'rect_incidence', 'rect_rectilinearity', 'rect_planarity', 'rect_azimuth_MA250', 'rect_incidence_MA250', 'rect_rectilinearity_MA250', 'rect_planarity_MA250', 'rect_azimuth_MA1000', 'rect_incidence_MA1000', 'rect_rectilinearity_MA1000', 'rect_planarity_MA1000', 'rms_freq_amp_E', 'rms_freq_amp_N', 'rms_freq_amp_Z', 'rms_freq_amp_E_MA250', 'rms_freq_amp_N_MA250', 'rms_freq_amp_Z_MA250', 'rms_freq_amp_E_MA1000', 'rms_freq_amp_N_MA1000', 'rms_freq_amp_Z_MA1000', 'max_power_freq_amp_E', 'max_power_freq_amp_N', 'max_power_freq_amp_Z', 'max_power_freq_amp_E_MA250', 'max_power_freq_amp_N_MA250', 'max_power_freq_amp_Z_MA250', 'max_power_freq_amp_E_MA1000', 'max_power_freq_amp_N_MA1000', 'max_power_freq_amp_Z_MA1000', 'dominant_freq_E', 'dominant_freq_N', 'dominant_freq_Z', 'dominant_freq_E_MA250', 'dominant_freq_N_MA250', 'dominant_freq_Z_MA250', 'dominant_freq_E_MA1000', 'dominant_freq_N_MA1000', 'dominant_freq_Z_MA1000', 'xcor_E_deep_max', 'xcor_E_deep_mean', 'xcor_E_shallow_max', 'xcor_E_shallow_mean', 'xcor_N_deep_max', 'xcor_N_deep_mean', 'xcor_N_shallow_max', 'xcor_N_shallow_mean', 'xcor_Z_deep_max', 'xcor_Z_deep_mean', 'xcor_Z_shallow_max', 'xcor_Z_shallow_mean', 'xcor_E_MA250_deep_max', 'xcor_E_MA250_deep_mean', 'xcor_E_MA250_shallow_max', 'xcor_E_MA250_shallow_mean', 'xcor_N_MA250_deep_max', 'xcor_N_MA250_deep_mean', 'xcor_N_MA250_shallow_max', 'xcor_N_MA250_shallow_mean', 'xcor_Z_MA250_deep_max', 'xcor_Z_MA250_deep_mean', 'xcor_Z_MA250_shallow_max', 'xcor_Z_MA250_shallow_mean', 'xcor_E_MA1000_deep_max', 'xcor_E_MA1000_deep_mean', 'xcor_E_MA1000_shallow_max', 'xcor_E_MA1000_shallow_mean', 'xcor_N_MA1000_deep_max', 'xcor_N_MA1000_deep_mean', 'xcor_N_MA1000_shallow_max', 'xcor_N_MA1000_shallow_mean', 'xcor_Z_MA1000_deep_max', 'xcor_Z_MA1000_deep_mean', 'xcor_Z_MA1000_shallow_max', 'xcor_Z_MA1000_shallow_mean', 'E_env_sim_deep_max', 'E_env_sim_deep_mean', 'E_env_sim_shallow_max', 'E_env_sim_shallow_mean', 'N_env_sim_deep_max', 'N_env_sim_deep_mean', 'N_env_sim_shallow_max', 'N_env_sim_shallow_mean', 'Z_env_sim_deep_max', 'Z_env_sim_deep_mean', 'Z_env_sim_shallow_max', 'Z_env_sim_shallow_mean' ]]