def analyze_data(families, staloc, nhour, t1, duration, dt, ncpu, icpu): """ """ nfamilies = int(ceil(len(families) / ncpu)) ibegin = icpu * nfamilies iend = min((icpu + 1) * nfamilies, len(families)) for i in range(ibegin, iend): # Create directory to store the LFEs times namedir = 'LFEs/' + families['family'].iloc[i] if not os.path.exists(namedir): os.makedirs(namedir) # File to write error messages namedir = 'error' if not os.path.exists(namedir): os.makedirs(namedir) errorfile = 'error/' + families['family'].iloc[i] + '.txt' # Create dataframe to store LFE times df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \ 'minute', 'second', 'cc', 'nchannel']) # Read the templates stations = families['stations'].iloc[i].split(',') templates = Stream() for station in stations: data = pickle.load(open(template_dir + '/' + families['family'].iloc[i] + \ '/' + station + '.pkl', 'rb')) if (len(data) == 3): EW = data[0] NS = data[1] UD = data[2] EW.stats.station = station NS.stats.station = station EW.stats.channel = 'E' NS.stats.channel = 'N' templates.append(EW) templates.append(NS) else: UD = data[0] UD.stats.station = station UD.stats.channel = 'Z' templates.append(UD) # Loop on hours of data for hour in range(0, nhour): nchannel = 0 Tstart = t1 + hour * 3600.0 Tend = t1 + (hour + 1) * 3600.0 + duration delta = Tend - Tstart ndata = int(delta / dt) + 1 # Get the data data = [] for station in stations: try: D = read('tmp/' + station + '.mseed') D = D.slice(Tstart, Tend) namefile = 'tmp/' + station + '.pkl' orientation = pickle.load(open(namefile, 'rb')) # Get station metadata for reading response file for ir in range(0, len(staloc)): if (station == staloc['station'][ir]): network = staloc['network'][ir] channels = staloc['channels'][ir] location = staloc['location'][ir] server = staloc['server'][ir] # Orientation of template # Date chosen: April 1st 2008 mychannels = channels.split(',') mylocation = location if (mylocation == '--'): mylocation = '' response = '../data/response/' + network + '_' + station + '.xml' inventory = read_inventory(response, format='STATIONXML') reference = [] for channel in mychannels: angle = inventory.get_orientation(network + '.' + \ station + '.' + mylocation + '.' + channel, \ UTCDateTime(2008, 4, 1, 0, 0, 0)) reference.append(angle) # Append data to stream if (type(D) == obspy.core.stream.Stream): stationdata = fill_data(D, orientation, station, channels, reference) if (len(stationdata) > 0): for stream in stationdata: data.append(stream) except: message = 'No data available for station {} '.format( \ station) + 'at time {}/{}/{} - {}:{}:{}\n'.format( \ Tstart.year, Tstart.month, Tstart.day, Tstart.hour, \ Tstart.minute, Tstart.second) # Loop on channels for channel in range(0, len(data)): subdata = data[channel] # Check whether we have a complete one-hour-long recording if (len(subdata) == 1): if (len(subdata[0].data) == ndata): # Get the template station = subdata[0].stats.station component = subdata[0].stats.channel template = templates.select(station=station, \ component=component)[0] # Cross correlation cctemp = correlate.optimized(template, subdata[0]) if (nchannel > 0): cc = np.vstack((cc, cctemp)) else: cc = cctemp nchannel = nchannel + 1 if (nchannel > 0): # Compute average cross-correlation across channels meancc = np.mean(cc, axis=0) if (type_threshold == 'MAD'): MAD = np.median(np.abs(meancc - np.mean(meancc))) index = np.where(meancc >= threshold * MAD) elif (type_threshold == 'Threshold'): index = np.where(meancc >= threshold) else: raise ValueError('Type of threshold must be MAD or Threshold') times = np.arange(0.0, np.shape(meancc)[0] * dt, dt) # Get LFE times if np.shape(index)[1] > 0: (time, cc) = clean_LFEs(index, times, meancc, dt, freq0) # Add LFE times to dataframe i0 = len(df.index) for j in range(0, len(time)): timeLFE = Tstart + time[j] df.loc[i0 + j] = [int(timeLFE.year), int(timeLFE.month), \ int(timeLFE.day), int(timeLFE.hour), \ int(timeLFE.minute), timeLFE.second + \ timeLFE.microsecond / 1000000.0, cc[j], nchannel] # Add to pandas dataframe and save namefile = 'LFEs/' + families['family'].iloc[i] + '/catalog.pkl' if os.path.exists(namefile): df_all = pickle.load(open(namefile, 'rb')) df_all = pd.concat([df_all, df], ignore_index=True) else: df_all = df df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \ 'day':'int32', 'hour':'int32', 'minute':'int32', \ 'second':'float', 'cc':'float', 'nchannel':'int32'}) pickle.dump(df_all, open(namefile, 'wb'))
def find_LFEs(filename, stations, tbegin, tend, TDUR, filt, \ freq0, nattempts, waittime, draw=False, type_threshold='MAD', \ threshold=0.0075): """ Find LFEs with the temporary stations from FAME using the templates from Plourde et al. (2015) Input: type filename = string filename = Name of the template type stations = list of strings stations = name of the stations used for the matched-filter algorithm type tebgin = tuplet of 6 integers tbegin = Time when we begin looking for LFEs type tend = tuplet of 6 integers tend = Time we stop looking for LFEs type TDUR = float TDUR = Time to add before and after the time window for tapering type filt = tuple of floats filt = Lower and upper frequencies of the filter type freq0 = float freq0 = Maximum frequency rate of LFE occurrence type nattempts = integer nattempts = Number of times we try to download data type waittime = positive float waittime = Type to wait between two attempts at downloading type draw = boolean draw = Do we draw a figure of the cross-correlation? type type_threshold = string type_threshold = 'MAD' or 'Threshold' type threshold = float threshold = Cross correlation value must be higher than that Output: None """ # Get the network, channels, and location of the stations staloc = pd.read_csv('../data/Ducellier/stations_permanent.txt', \ sep=r'\s{1,}', header=None, engine='python') staloc.columns = ['station', 'network', 'channels', 'location', \ 'server', 'latitude', 'longitude', 'time_on', 'time_off'] # Create directory to store the LFEs times namedir = 'LFEs/' + filename if not os.path.exists(namedir): os.makedirs(namedir) # File to write error messages namedir = 'error' if not os.path.exists(namedir): os.makedirs(namedir) errorfile = 'error/' + filename + '.txt' # Read the templates templates = Stream() for station in stations: data = pickle.load(open('templates_new/' + filename + \ '/' + station + '.pkl', 'rb')) if (len(data) == 3): EW = data[0] NS = data[1] UD = data[2] EW.stats.station = station NS.stats.station = station EW.stats.channel = 'E' NS.stats.channel = 'N' templates.append(EW) templates.append(NS) else: UD = data[0] UD.stats.station = station UD.stats.channel = 'Z' templates.append(UD) # Begin and end time of analysis t1 = UTCDateTime(year=tbegin[0], month=tbegin[1], \ day=tbegin[2], hour=tbegin[3], minute=tbegin[4], \ second=tbegin[5]) t2 = UTCDateTime(year=tend[0], month=tend[1], \ day=tend[2], hour=tend[3], minute=tend[4], \ second=tend[5]) # Read the data data = [] for station in stations: # Get station metadata for downloading for ir in range(0, len(staloc)): if (station == staloc['station'][ir]): network = staloc['network'][ir] channels = staloc['channels'][ir] location = staloc['location'][ir] server = staloc['server'][ir] # Duration of template template = templates.select(station=station, component='Z')[0] dt = template.stats.delta nt = template.stats.npts duration = (nt - 1) * dt Tstart = t1 - TDUR Tend = t2 + duration + TDUR delta = t2 + duration - t1 ndata = int(delta / dt) + 1 # Orientation of template # Date chosen: April 1st 2008 mychannels = channels.split(',') mylocation = location if (mylocation == '--'): mylocation = '' response = '../data/response/' + network + '_' + station + '.xml' inventory = read_inventory(response, format='STATIONXML') reference = [] for channel in mychannels: angle = inventory.get_orientation(network + '.' + \ station + '.' + mylocation + '.' + channel, \ UTCDateTime(2012, 1, 1, 0, 0, 0)) reference.append(angle) # First case: we can get the data from IRIS if (server == 'IRIS'): (D, orientation) = get_from_IRIS(station, network, channels, \ location, Tstart, Tend, filt, dt, nattempts, waittime, \ errorfile) # Second case: we get the data from NCEDC elif (server == 'NCEDC'): (D, orientation) = get_from_NCEDC(station, network, channels, \ location, Tstart, Tend, filt, dt, nattempts, waittime, \ errorfile) else: raise ValueError('You can only download data from IRIS and NCEDC') # Append data to stream if (type(D) == obspy.core.stream.Stream): stationdata = fill_data(D, orientation, station, channels, reference) if (len(stationdata) > 0): for stream in stationdata: data.append(stream) # Number of hours of data to analyze nhour = int(ceil((t2 - t1) / 3600.0)) # Create dataframe to store LFE times df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \ 'minute', 'second', 'cc', 'nchannel']) # Loop on hours of data for hour in range(0, nhour): nchannel = 0 Tstart = t1 + hour * 3600.0 Tend = t1 + (hour + 1) * 3600.0 + duration delta = Tend - Tstart ndata = int(delta / dt) + 1 # Loop on channels for channel in range(0, len(data)): # Cut the data subdata = data[channel] subdata = subdata.slice(Tstart, Tend) # Check whether we have a complete one-hour-long recording if (len(subdata) == 1): if (len(subdata[0].data) == ndata): # Get the template station = subdata[0].stats.station component = subdata[0].stats.channel template = templates.select(station=station, \ component=component)[0] # Cross correlation cctemp = correlate.optimized(template, subdata[0]) if (nchannel > 0): cc = np.vstack((cc, cctemp)) else: cc = cctemp nchannel = nchannel + 1 if (nchannel > 0): # Compute average cross-correlation across channels meancc = np.mean(cc, axis=0) if (type_threshold == 'MAD'): MAD = np.median(np.abs(meancc - np.mean(meancc))) index = np.where(meancc >= threshold * MAD) elif (type_threshold == 'Threshold'): index = np.where(meancc >= threshold) else: raise ValueError('Type of threshold must be MAD or Threshold') times = np.arange(0.0, np.shape(meancc)[0] * dt, dt) # Get LFE times if np.shape(index)[1] > 0: (time, cc) = clean_LFEs(index, times, meancc, dt, freq0) # Add LFE times to dataframe i0 = len(df.index) for i in range(0, len(time)): timeLFE = Tstart + time[i] df.loc[i0 + i] = [int(timeLFE.year), int(timeLFE.month), \ int(timeLFE.day), int(timeLFE.hour), \ int(timeLFE.minute), timeLFE.second + \ timeLFE.microsecond / 1000000.0, cc[i], nchannel] # Draw figure if (draw == True): params = {'xtick.labelsize':16, 'ytick.labelsize':16} pylab.rcParams.update(params) plt.figure(1, figsize=(20, 8)) if np.shape(index)[1] > 0: for i in range(0, len(time)): plt.axvline(time[i], linewidth=2, color='grey') plt.plot(np.arange(0.0, np.shape(meancc)[0] * dt, \ dt), meancc, color='black') if (type_threshold == 'MAD'): plt.axhline(threshold * MAD, linewidth=2, color='red', \ label = '{:6.2f} * MAD'.format(threshold)) elif (type_threshold == 'Threshold'): plt.axhline(threshold, linewidth=2, color='red', \ label = 'Threshold = {:8.4f}'.format(threshold)) else: raise ValueError( \ 'Type of threshold must be MAD or Threshold') plt.xlim(0.0, (np.shape(meancc)[0] - 1) * dt) plt.xlabel('Time (s)', fontsize=24) plt.ylabel('Cross-correlation', fontsize=24) plt.title('Average cross-correlation across stations', \ fontsize=30) plt.legend(loc=2, fontsize=24) plt.savefig('LFEs/' + filename + '/' + \ '{:04d}{:02d}{:02d}_{:02d}{:02d}{:02d}'.format( \ Tstart.year, Tstart.month, Tstart.day, Tstart.hour, \ Tstart.minute, Tstart.second) + '.png', format='png') plt.close(1) # Add to pandas dataframe and save namefile = 'LFEs/' + filename + '/catalog.pkl' if os.path.exists(namefile): df_all = pickle.load(open(namefile, 'rb')) df_all = pd.concat([df_all, df], ignore_index=True) else: df_all = df df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \ 'day':'int32', 'hour':'int32', 'minute':'int32', \ 'second':'float', 'cc':'float', 'nchannel':'int32'}) pickle.dump(df_all, open(namefile, 'wb'))
def find_LFEs(family_file, station_file, template_dir, tbegin, tend, \ TDUR, duration, filt, freq0, dt, nattempts, waittime, type_threshold='MAD', \ threshold=0.0075): """ Find LFEs with the temporary stations from FAME using the templates from Plourde et al. (2015) Input: type family_file = string family_file = File containing the list of LFE families type station_file = string station_file = File containing the list of stations type template_dir = string template_dir = Directory where to find the LFE templates type tbegin = tuplet of 6 integers tbegin = Time when we begin looking for LFEs type tend = tuplet of 6 integers tend = Time we stop looking for LFEs type TDUR = float TDUR = Time to add before and after the time window for tapering type duration = float duration = Duration of the LFE templates type filt = tuple of floats filt = Lower and upper frequencies of the filter type freq0 = float freq0 = Maximum frequency rate of LFE occurrence type dt = float dt = Time step for the LFE templates type nattempts = integer nattempts = Number of times we try to download data type waittime = positive float waittime = Type to wait between two attempts at downloading type type_threshold = string type_threshold = 'MAD' or 'Threshold' type threshold = float threshold = Cross correlation value must be higher than that Output: None """ # Get the network, channels, and location of the stations staloc = pd.read_csv(station_file, \ sep=r'\s{1,}', header=None, engine='python') staloc.columns = ['station', 'network', 'channels', 'location', \ 'server', 'latitude', 'longitude', 'time_on', 'time_off'] # Begin and end time of analysis t1 = UTCDateTime(year=tbegin[0], month=tbegin[1], \ day=tbegin[2], hour=tbegin[3], minute=tbegin[4], \ second=tbegin[5]) t2 = UTCDateTime(year=tend[0], month=tend[1], \ day=tend[2], hour=tend[3], minute=tend[4], \ second=tend[5]) # Number of hours of data to analyze nhour = int(ceil((t2 - t1) / 3600.0)) # Begin and end time of downloading Tstart = t1 - TDUR Tend = t2 + duration + TDUR # Temporary directory to store the data namedir = 'tmp' if not os.path.exists(namedir): os.makedirs(namedir) # Download the data from the stations for ir in range(0, len(staloc)): station = staloc['station'][ir] network = staloc['network'][ir] channels = staloc['channels'][ir] location = staloc['location'][ir] server = staloc['server'][ir] time_on = staloc['time_on'][ir] time_off = staloc['time_off'][ir] # File to write error messages namedir = 'error' if not os.path.exists(namedir): os.makedirs(namedir) errorfile = 'error/' + station + '.txt' # Check whether there are data for this period of time year_on = int(time_on[0:4]) month_on = int(time_on[5:7]) day_on = int(time_on[8:10]) year_off = int(time_off[0:4]) month_off = int(time_off[5:7]) day_off = int(time_off[8:10]) if ((Tstart > UTCDateTime(year=year_on, month=month_on, day=day_on)) \ and (Tend < UTCDateTime(year=year_off, month=month_off, day=day_off))): # First case: we can get the data from IRIS if (server == 'IRIS'): (D, orientation) = get_from_IRIS(station, network, channels, \ location, Tstart, Tend, filt, dt, nattempts, waittime, \ errorfile, DATADIR) # Second case: we get the data from NCEDC elif (server == 'NCEDC'): (D, orientation) = get_from_NCEDC(station, network, channels, \ location, Tstart, Tend, filt, dt, nattempts, waittime, \ errorfile, DATADIR) else: raise ValueError( 'You can only download data from IRIS and NCEDC') # Store the data into temporary files if (type(D) == obspy.core.stream.Stream): D.write('tmp/' + station + '.mseed', format='MSEED') namefile = 'tmp/' + station + '.pkl' pickle.dump(orientation, open(namefile, 'wb')) # Loop on families families = pd.read_csv(family_file, \ sep=r'\s{1,}', header=None, engine='python') families.columns = ['family', 'stations'] for i in range(0, len(families)): # Create directory to store the LFEs times namedir = 'LFEs/' + families['family'].iloc[i] if not os.path.exists(namedir): os.makedirs(namedir) # File to write error messages namedir = 'error' if not os.path.exists(namedir): os.makedirs(namedir) errorfile = 'error/' + families['family'].iloc[i] + '.txt' # Create dataframe to store LFE times df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \ 'minute', 'second', 'cc', 'nchannel']) # Read the templates stations = families['stations'].iloc[i].split(',') templates = Stream() for station in stations: templatefile = template_dir + '/' + \ families['family'].iloc[i] + '/' + station + '.pkl' with open(templatefile, 'rb') as f: data = pickle.load(f) if (len(data) == 3): EW = data[0] NS = data[1] UD = data[2] EW.stats.station = station NS.stats.station = station EW.stats.channel = 'E' NS.stats.channel = 'N' templates.append(EW) templates.append(NS) else: UD = data[0] UD.stats.station = station UD.stats.channel = 'Z' templates.append(UD) # Loop on hours of data for hour in range(0, nhour): nchannel = 0 Tstart = t1 + hour * 3600.0 Tend = t1 + (hour + 1) * 3600.0 + duration delta = Tend - Tstart ndata = int(delta / dt) + 1 # Get the data data = [] for station in stations: try: D = read('tmp/' + station + '.mseed') D = D.slice(Tstart, Tend) namefile = 'tmp/' + station + '.pkl' orientation = pickle.load(open(namefile, 'rb')) # Get station metadata for reading response file for ir in range(0, len(staloc)): if (station == staloc['station'][ir]): network = staloc['network'][ir] channels = staloc['channels'][ir] location = staloc['location'][ir] server = staloc['server'][ir] # Orientation of template # Date chosen: April 1st 2008 mychannels = channels.split(',') mylocation = location if (mylocation == '--'): mylocation = '' response = os.path.join( DATADIR, 'response/') + network + '_' + station + '.xml' inventory = read_inventory(response, format='STATIONXML') reference = [] for channel in mychannels: angle = inventory.get_orientation(network + '.' + \ station + '.' + mylocation + '.' + channel, \ UTCDateTime(2020, 1, 1, 0, 0, 0)) reference.append(angle) # Append data to stream if (type(D) == obspy.core.stream.Stream): stationdata = fill_data(D, orientation, station, channels, reference) if (len(stationdata) > 0): for stream in stationdata: data.append(stream) except: message = 'No data available for station {} '.format( \ station) + 'at time {}/{}/{} - {}:{}:{}\n'.format( \ Tstart.year, Tstart.month, Tstart.day, Tstart.hour, \ Tstart.minute, Tstart.second) # Loop on channels for channel in range(0, len(data)): subdata = data[channel] # Check whether we have a complete one-hour-long recording if (len(subdata) == 1): if (len(subdata[0].data) == ndata): # Get the template station = subdata[0].stats.station component = subdata[0].stats.channel template = templates.select(station=station, \ component=component)[0] # Cross correlation cctemp = correlate.optimized(template, subdata[0]) if (nchannel > 0): cc = np.vstack((cc, cctemp)) else: cc = cctemp nchannel = nchannel + 1 if (nchannel > 0): # Compute average cross-correlation across channels meancc = np.mean(cc, axis=0) if (type_threshold == 'MAD'): MAD = np.median(np.abs(meancc - np.mean(meancc))) index = np.where(meancc >= threshold * MAD) elif (type_threshold == 'Threshold'): index = np.where(meancc >= threshold) else: raise ValueError( 'Type of threshold must be MAD or Threshold') times = np.arange(0.0, np.shape(meancc)[0] * dt, dt) # Get LFE times if np.shape(index)[1] > 0: (time, cc) = clean_LFEs(index, times, meancc, dt, freq0) # Add LFE times to dataframe i0 = len(df.index) for j in range(0, len(time)): timeLFE = Tstart + time[j] df.loc[i0 + j] = [int(timeLFE.year), int(timeLFE.month), \ int(timeLFE.day), int(timeLFE.hour), \ int(timeLFE.minute), timeLFE.second + \ timeLFE.microsecond / 1000000.0, cc[j], nchannel] # Add to pandas dataframe and save df_all = df df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \ 'day':'int32', 'hour':'int32', 'minute':'int32', \ 'second':'float', 'cc':'float', 'nchannel':'int32'}) df_all.to_csv('LFEs/' + families['family'].iloc[i] + '/catalog_' + \ '{:04d}{:02d}{:02d}_{:02d}{:02d}{:02d}'.format(tbegin[0], \ tbegin[1], tbegin[2], tbegin[3], tbegin[4], tbegin[5]) + '.csv')
def analyze_data(families, staloc, tbegin, tend, \ freq0, type_threshold, threshold, ncpu, icpu): """ """ nfamilies = int(ceil(len(families) / ncpu)) ibegin = icpu * nfamilies iend = min((icpu + 1) * nfamilies, len(families)) # Loop on families for i in range(ibegin, iend): # Create directory to store the LFEs times namedir = 'LFEs/' + families['family'].iloc[i] if not os.path.exists(namedir): os.makedirs(namedir) # File to write number of stations namedir = 'nstations' if not os.path.exists(namedir): os.makedirs(namedir) stationfile = 'nstations/' + families['family'].iloc[i] + '.txt' # Create dataframe to store LFE times df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \ 'minute', 'second', 'cc', 'nchannel']) # Read the templates stations = families['stations'].iloc[i].split(',') templates = Stream() orientations = [] names = [] for station in stations: subset = staloc.loc[staloc['station'] == station] channels = subset['channels'].iloc[0] mychannels = channels.split(',') for channel in mychannels: data = pickle.load(open(template_dir + '/' + \ families['family'].iloc[i] + '/' + station + '_' + \ channel + '.pkl', 'rb')) template = data[0] angle = data[1] templates.append(template) orientations.append(angle) names.append(station + '_' + channel) # Check the time step of the stations subset = staloc.loc[staloc['station'].isin(stations)] if len(subset['dt'].value_counts()) == 1: dt = subset['dt'].iloc[0] else: raise ValueError('All stations must have the same time step') # Number of hours of data to analyze t1 = UTCDateTime(year=tbegin[0], month=tbegin[1], \ day=tbegin[2], hour=tbegin[3], minute=tbegin[4], \ second=tbegin[5]) t2 = UTCDateTime(year=tend[0], month=tend[1], \ day=tend[2], hour=tend[3], minute=tend[4], \ second=tend[5]) nhour = int(ceil((t2 - t1) / 3600.0)) duration = families['duration'].iloc[i] # To rotate components swap = {'E': 'N', 'N': 'E', '1': '2', '2': '1'} # Loop on hours of data for hour in range(0, nhour): Tstart = t1 + hour * 3600.0 Tend = t1 + (hour + 1) * 3600.0 + duration delta = Tend - Tstart ndata = int(delta / dt) + 1 # Get the data data = [] for station in stations: subset = staloc.loc[staloc['station'] == station] channels = subset['channels'].iloc[0] mychannels = channels.split(',') for num, channel in enumerate(mychannels): try: D = read('tmp/' + station + '_' + channel + '.mseed') D = D.slice(Tstart, Tend) if (type(D) == obspy.core.stream.Stream): namefile = 'tmp/' + station + '_' + channel + \ '.pkl' orientation = pickle.load(open(namefile, 'rb')) \ [num] index = names.index(station + '_' + channel) reference = orientations[index] # Rotate components if (len(mychannels) > 1) and (num < 2): if orientation != reference: channel_new = channel[0:2] + \ swap[channel[2]] D_new = read('tmp/' + station + '_' + \ channel_new + '.mseed') D_new = D_new.slice(Tstart, Tend) namefile = 'tmp/' + station + '_' + \ channel_new + '.pkl' if num == 0: orientation_new = pickle.load(open( \ namefile, 'rb'))[1] else: orientation_new = pickle.load(open( \ namefile, 'rb'))[0] index = names.index(station + '_' + \ channel_new) reference_new = orientations[index] if channel[2] in ['E', '1']: D = rotate_data(D, D_new, \ orientation, orientation_new, \ reference, reference_new, 'E') else: D = rotate_data(D_new, D, \ orientation_new, orientation, \ reference_new, reference, 'N') # Append stream to data data.append(D) except: message = 'No data available for station {}'.format( \ station) + ' and channel {}'.format(channel) + \ ' at time {}/{}/{} - {}:{}:{}\n'.format( \ Tstart.year, Tstart.month, Tstart.day, \ Tstart.hour, Tstart.minute, Tstart.second) # Loop on channels nchannel = 0 for j in range(0, len(data)): subdata = data[j] # Check whether we have a complete one-hour-long recording if (len(subdata) == 1): if (len(subdata[0].data) == ndata): # Get the template station = subdata[0].stats.station channel = subdata[0].stats.channel template = templates.select(station=station, \ channel=channel)[0] # Cross correlation cctemp = correlate.optimized(template, subdata[0]) if (nchannel > 0): cc = np.vstack((cc, cctemp)) else: cc = cctemp nchannel = nchannel + 1 # Write number of channels with open(stationfile, 'a') as file: file.write('{} {} {} {} {}\n'.format(Tstart.year, \ Tstart.month, Tstart.day, Tstart.hour, nchannel)) if (nchannel > 0): # Compute average cross-correlation across channels if len(np.shape(cc)) == 1: meancc = cc else: meancc = np.mean(cc, axis=0) if (type_threshold == 'MAD'): MAD = np.median(np.abs(meancc - np.mean(meancc))) index = np.where(meancc >= threshold * MAD) elif (type_threshold == 'Threshold'): index = np.where(meancc >= threshold) else: raise ValueError( \ 'Type of threshold must be MAD or Threshold') times = np.arange(0.0, np.shape(meancc)[0] * dt, dt) # Get LFE times if np.shape(index)[1] > 0: (time, cc) = clean_LFEs(index, times, meancc, dt, freq0) # Add LFE times to dataframe i0 = len(df.index) for j in range(0, len(time)): timeLFE = Tstart + time[j] df.loc[i0 + j] = [int(timeLFE.year), \ int(timeLFE.month), int(timeLFE.day), \ int(timeLFE.hour), int(timeLFE.minute), \ timeLFE.second + timeLFE.microsecond / 1000000.0, \ cc[j], nchannel] # Add to pandas dataframe and save namefile = 'LFEs/' + families['family'].iloc[i] + '/catalog.pkl' if os.path.exists(namefile): df_all = pickle.load(open(namefile, 'rb')) df_all = pd.concat([df_all, df], ignore_index=True) else: df_all = df df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \ 'day':'int32', 'hour':'int32', 'minute':'int32', \ 'second':'float', 'cc':'float', 'nchannel':'int32'}) pickle.dump(df_all, open(namefile, 'wb'))