def decluster_day_parties(party_dir, tribe_dir, trig_int, min_chan, metric, start, end): """ :param party_dir: Directory housing the Party files from match_filter :param trig_int: Minimum separation dist between detections in secs :param min_chan: Minimum number of channels used in detection :param metric: 'avg_cor' or 'cor_sum' :param start: Start UTCDateTime for instance :param end: End UTCDateTime for instance. :return: """ from glob import glob from obspy import UTCDateTime from eqcorrscan.core.match_filter import Party, Tribe all_parties = glob('%s/*[0-9].tgz' % party_dir) party_files = [f for f in all_parties if UTCDateTime(f.split('_')[-2]) > start - 1 and UTCDateTime(f.split('_')[-2]) < end + 1] all_files = glob('%s/*' % party_dir) party_files.sort() num = 0 print('Reading tribes to memory') tribes = [(Tribe().read(tribe_file), tribe_file.split('_')[-1].split('.')[0]) for tribe_file in glob('%s/*' % tribe_dir)] print('Starting declustering') for i, party_file in enumerate(party_files): strt = UTCDateTime() print('Processing party %s at %02d:%02d:%02d' % (party_file, strt.hour, strt.minute, strt.second)) party = Party() party.read(party_file) for tribe in tribes: outfile = '%s_min%02d_%s_%s_declust' % (party_file.split('.')[0], min_chan, metric, tribe[1]) if '%s.tgz' % outfile in all_files: print('Already wrote %s.tgz' % outfile) continue print('Working on tribe %s' % tribe[1]) num += 1 print('Original Party has length %d' % len(party)) print('Partitioning into cluster: %s' % tribe[1]) part_party = partition_party_by_tribe(party, tribe[0]) print('Enforcing minimum no_chans') part_party.min_chans(min_chan) print('Declustering') try: part_party.decluster(trig_int=trig_int, metric=metric) except IndexError as msg: print('Declustering failed with %s\n' % msg) print('Probably no detections') continue print('Writing party to %s' % outfile) part_party.write(outfile) return
def lag_calc_daylong(wav_dirs, party, start, end, outdir, shift_len, min_cc, cores=5, parallel=True, plot=False, debug=1): """ Essentially just a day loop to grab the day's waveforms and the day's party and then perform the lag calc :param wav_dir: :param party: eqcorrscane.core.match_filter.Party :return: """ import datetime from obspy import UTCDateTime from eqcorrscan.core.match_filter import Party, Family cat_start = datetime.datetime.strptime(start, '%d/%m/%Y') cat_end = datetime.datetime.strptime(end, '%d/%m/%Y') for date in date_generator(cat_start, cat_end): # Find waveforms and create party for this day dto = UTCDateTime(date) day_fams = [] for fam in party: day_fams.append(Family(detections=[det for det in fam if det.detect_time >= dto and det.detect_time < dto + 86400], template=fam.template)) day_party = Party(families=day_fams) stachans = {tr.stats.station: [] for family in day_party for tr in family.template.st} for family in party: for tr in family.template.st: # Don't hard code vertical channels!! chan_code = 'EH' + tr.stats.channel[-1] if chan_code not in stachans[tr.stats.station]: stachans[tr.stats.station].append(chan_code) print('Reading waveforms') wav_ds = ['%s%d' % (d, dto.year) for d in wav_dirs] st = grab_day_wavs(wav_dirs=wav_ds, dto=dto, stachans=stachans) print('Running lag calc') day_cat = day_party.lag_calc(stream=st, pre_processed=False, shift_len=shift_len, min_cc=min_cc, cores=cores, debug=debug, plot=plot, parallel=parallel) day_cat.write('%s/det_cat_mcc%0.3f_shift%0.1f_%s.xml' \ % (outdir, min_cc, shift_len, dto.strftime('%Y-%m-%d')), format='QUAKEML') return
def setUpClass(cls): samp_rate = 50 cls.t_length = .75 # Make some synthetic templates templates, data, seeds = generate_synth_data(nsta=5, ntemplates=5, nseeds=10, samp_rate=samp_rate, t_length=cls.t_length, max_amp=10, max_lag=15, phaseout="both", jitter=0, noise=False, same_phase=True) # Rename channels channel_mapper = {"SYN_Z": "HHZ", "SYN_H": "HHN"} for tr in data: tr.stats.channel = channel_mapper[tr.stats.channel] for template in templates: for tr in template: tr.stats.channel = channel_mapper[tr.stats.channel] cls.party = Party() t = 0 data_start = data[0].stats.starttime for template, template_seeds in zip(templates, seeds): template_name = "template_{0}".format(t) detections = [] for i, sample in enumerate(template_seeds["time"]): det = Detection(template_name=template_name, detect_time=data_start + (sample / samp_rate), detect_val=template_seeds["SNR"][i] / len(data), no_chans=len(data), chans=[(tr.stats.station, tr.stats.channel) for tr in data], threshold=0.0, threshold_input=0.0, threshold_type="abs", typeofdet="ccc") det._calculate_event(template_st=template, estimate_origin=False) detections.append(det) # Make a fully formed Template _template = Template(name=template_name, st=template, lowcut=2.0, highcut=15.0, samp_rate=samp_rate, filt_order=4, process_length=86400, prepick=10. / samp_rate, event=None) family = Family(template=_template, detections=detections) cls.party += family t += 1 cls.data = data
def mseed_2_Party(wav_dir, cat, temp_cat, lowcut, highcut, filt_order, process_length, prepick): """ Take waveforms and catalog and create a Party object :param wav_dir: :param cat: :return: """ partay = Party() # Get templates first temp_tup = [(ev, str(ev.resource_id).split('/')[-1].split('_')[0]) for ev in cat if str(ev.resource_id).split('/')[-1].split('_')[-1]=='self'] temp_evs, temp_ids = zip(*temp_tup) temp_evs = list(temp_evs) wav_files = ['%s/%s.mseed' % (wav_dir, str(ev.resource_id).split('/')[-1]) for ev in temp_evs] temp_wavs = [read(wav) for wav in wav_files if os.path.isfile(wav)] for temp_wav, temp_ev in zip(temp_wavs, temp_evs): #Create a Template object, assign it to Family and then to Party tid = str(temp_ev.resource_id).split('/')[-1].split('_')[0] if len([ev for ev in temp_cat if str(ev.resource_id).split('/')[-1] == tid]) > 0: temp_ev = [ev for ev in temp_cat if str(ev.resource_id).split('/')[-1] == tid][0] tmp = Template(name=tid, st=temp_wav, lowcut=lowcut, highcut=highcut, samp_rate=temp_wav[0].stats.sampling_rate, filt_order=filt_order, process_length=process_length, prepick=prepick, event=temp_ev) fam_det_evs = [ev for ev in cat if str(ev.resource_id).split('/')[-1].split('_')[-1]!='self' and str(ev.resource_id).split('/')[-1].split('_')[0]==tid] fam_dets = [Detection(template_name=str(ev.resource_id).split('/')[-1].split('_')[0], detect_time=UTCDateTime([com.text.split('=')[-1] for com in ev.comments if com.text.split('=')[0]=='det_time'][0]), no_chans=len(ev.picks), chans=[pk.waveform_id.station_code for pk in ev.picks], detect_val=float([com.text.split('=')[-1] for com in ev.comments if com.text.split('=')[0]=='detect_val'][0]), threshold=float([com.text.split('=')[-1] for com in ev.comments if com.text.split('=')[0]=='threshold'][0]), typeofdet='corr', threshold_type='MAD', threshold_input=8.0, event=ev, id=str(ev.resource_id).split('/')[-1]) for ev in fam_det_evs] fam_cat = Catalog(events=[det.event for det in fam_dets]) fam = Family(template=tmp, detections=fam_dets, catalog=fam_cat) partay.families.append(fam) return partay
def combine_year_parties(party_dir): """ Take declustered parties and the combine them into one year-long party :param party_glob_str: Glob string to collect the correct parties :return: """ from glob import glob from eqcorrscan.core.match_filter import Party year = party_dir.split('/')[-1] for place in ['Rotokawa', 'Wairakei', 'North', 'South', 'Remainder']: glob_str = '%s/*min05_avg_cor_%s_declust*' % (party_dir, place) party_files = glob(glob_str) party_files.sort() big_party = Party() for party_file in party_files: print('Adding %s to big_party' % party_file) big_party += Party().read(party_file) print('Writing yearlong file for %s' % place) big_party.write('%s/Party_%s_min05_avg_cor_%s_declust' % (party_dir, year, place)) return
def partition_party_by_tribe(party, tribe): """ Take only families corresponding to given tribe :param party: :param tribe: :return: """ from eqcorrscan.core.match_filter import Party new_party = Party() names = [temp.name for temp in tribe] for fam in party: if fam.template.name in names: new_party += fam return new_party
def get_catalog(daystr): # Get borehole detections flist = glob(os.path.join(DETECT_DIR, "bhdetections_%s_hour*.xml" % (daystr))) full_cat = Catalog() flist.sort() for file in flist: Logger.info("Reading detections from %s" % os.path.split(file)[1]) tmp = read_events(file) full_cat += tmp Logger.info("Total number of BH detections: %d" % len(full_cat)) # Fetch other catalog files other_cat_files = glob("/home/gilbert_lab/cami_frs/eqcorrscan/templates_hawksOnly_f60/detections*/*%s*.tgz" % daystr) other_cat_files += glob("/home/gilbert_lab/cami_frs/eqcorrscan/templates_wBH_f60/detections*/*%s*.tgz" % daystr) # Look for new borehole detections only, not accounted for in previous EQcorrscan runs matched_cat = Catalog() matched_detect = [] parties = [] for file in other_cat_files: Logger.info("Looking for matching detections in previous EQcorrscan run in file:\n\t%s" % file) party = Party().read(file) parties.append((file, party)) for f in party: for d in f.detections: for ev in full_cat: tmin = min([p.time for p in ev.picks]) if abs(tmin - d.detect_time) < 2.0: matched_cat.append(ev) Logger.info("%d matched detections" % len(matched_cat)) cat = Catalog() for ev in full_cat: if ev not in matched_cat: cat += ev Logger.info("Total number of NEW BH detections: %d" % len(cat)) # new_cat_wp = Catalog() # for ev in cat: # if ev not in matched_cat and len([p for p in ev.picks if p.phase_hint == "P"]) > 0 and len(ev.magnitudes) > 0: # new_cat_wp += ev # print(new_cat_wp) # print("Total number of NEW BH detections with P picks and magnitudes: %d" % len(new_cat_wp)) return cat
def group(tribe): from eqcorrscan.core.match_filter import Party party = Party() template_groups = [[]] for master in tribe.templates: for group in template_groups: if master in group: break else: new_group = [master] for slave in tribe.templates: if master.same_processing(slave) and master != slave: new_group.append(slave) template_groups.append(new_group) for group in template_groups: if len(group) == 0: template_groups.remove(group) return template_groups
inst_dats = split_dates[instance] except IndexError: print('Instance no longer needed. Downsize --splits for this job') sys.exit() inst_start = min(inst_dats) inst_end = max(inst_dats) print('This instance will run from %s to %s' % (inst_start.strftime('%Y/%m/%d'), inst_end.strftime('%Y/%m/%d'))) else: inst_dats = all_dates # Reading tribe tribe = Tribe().read(tribe_file) party = Party() net_sta_loc_chans = list(set([(pk.waveform_id.network_code, pk.waveform_id.station_code, pk.waveform_id.location_code, pk.waveform_id.channel_code) for temp in tribe for pk in temp.event.picks])) for date in date_generator(inst_dats[0], inst_dats[-1]): dto = UTCDateTime(date) jday = dto.julday print('Running {}\nJday: {}'.format(dto, jday)) wav_files = ['{}/{}/{}/{}/{}/{}.{}.{}.{}.{}.{:03d}.ms'.format( wav_dir, date.year, nslc[0], nslc[1], nslc[3], nslc[0], nslc[1], nslc[2], nslc[3], date.year, jday) for nslc in net_sta_loc_chans] daylong = Stream() print('Reading wavs')
file = sys.argv[1] print(file) # Waveforms stream = read(file) detection_id = file.replace(".mseed", "") # Event info dum = file.split("_")[-1] family_file_root = file.replace("_%s" % dum, "") family_file = glob("family_%s_ndet*.tgz" % family_file_root) if not family_file: Logger.error("No family file found.") party = Party().read(family_file[0]) family = party[0] template = family.template event = template.event picks_auto = [] for p in event.picks: nscl = (p.waveform_id.network_code, p.waveform_id.station_code, p.waveform_id.location_code, p.waveform_id.channel_code) kind = 1 if p.phase_hint == "P" else 2 pick_time = str_to_time( p.time.strftime("%Y-%m-%d %H:%M:") + "%f" % (p.time.second + p.time.microsecond * 1e-6)) m = PhaseMarker(nslc_ids=[nscl], tmin=pick_time,
tribe_rd_strt = timer() # Reading tribe tribe = Tribe().read( '/projects/nesi00228/data/templates/12-15/Tribe_12-15_P_nodups.tgz') print('Reading Tribe tarball took %s seconds' % (timer() - tribe_rd_strt)) # Extract the station info from the templates stachans = {tr.stats.station: [] for temp in tribe for tr in temp.st} for temp in tribe: for tr in temp.st: # Don't hard code vertical channels!! chan_code = 'EH' + tr.stats.channel[-1] if chan_code not in stachans[tr.stats.station]: stachans[tr.stats.station].append(chan_code) # Specify locations of waveform files wav_dirs = ['/projects/nesi00228/data/miniseed/'] inst_partay = Party() for day in inst_dats: dto = UTCDateTime(day) wav_read_start = timer() wav_ds = ['%s%d' % (d, dto.year) for d in wav_dirs] st = grab_day_wavs(wav_ds, dto, stachans) st.merge(fill_value='interpolate') wav_read_stop = timer() print('Reading waveforms took %.3f seconds' % (wav_read_stop - wav_read_start)) print('Checking for trace length. Removing if too short') rm_trs = [] for tr in st: if len(tr.data) < (86400 * tr.stats.sampling_rate * 0.8): rm_trs.append(tr) if tr.stats.starttime != dto:
Logger.info("Finished loading catalog.") # Create tribe tribe = catalog_to_templates(catalog=cat) Logger.info("Finished tribe construction.") print(tribe) tribe_fname = os.path.join(OUTPUT_DIR, "tribe_init_day%s.tgz" % daystr) Logger.info("Saving initial tribe to: %s" % tribe_fname) tribe.write(filename=tribe_fname) # Detect: trig_int = 2.0 min_chans = 5 threshold = 0.7 threshold_type = "av_chan_corr" parties = Party() for hour in range(0, 24): Logger.info("Starting detection for hour %d" % hour) pattern = os.path.join(WF_DIR_ROOT, "G*", "*..DP*%s-%s-%s_%02d*2020*" % (year, month, day, hour)) if not glob(pattern): Logger.info("No data found for hour %d" % hour) continue st = read(pattern) st.resample(SAMPLING_RATE) st.detrend() party = tribe.detect(stream=st, threshold=threshold, threshold_type=threshold_type, trig_int=trig_int, plot=False, daylong=False, ignore_bad_data=True, parallel_proces=True, cores=cpu_count(), concurrency="multiprocess", group_size=20, overlap="calculate") party.min_chans(min_chans) #party.decluster(trig_int=trig_int)
def plot_non_cumulative(party, dates=False, tribe_list=False): """ Recreating something similar to Gabe's thesis fig. 4.9 plotting a party :param party: :param tribe_list: :return: """ if dates: date_party = Party() for fam in party: date_party += Family(detections=[det for det in fam.detections if det.detect_time < dates[1] and det.detect_time > dates[0]], template=Template()) party = date_party # Make list of list of template names for each tribe mult_list = [[temp.name for temp in tribe] for tribe in tribe_list] # Setup generator for colors as in cumulative_detections() colors = cycle(['blue', 'green', 'red', 'cyan', 'magenta', 'black', 'purple', 'darkgoldenrod', 'gray']) # Make color dict with key as mutliplet no col_dict = {i : (next(colors) if len(mult) > 1 else 'grey') for i, mult in enumerate(tribe_list)} detections = [] for fam in party: detections.extend(fam.detections) dates = [] template_names = [] for detection in detections: if not type(detection) == Detection: msg = 'detection not of type: ' + \ 'eqcorrscan.core.match_filter.Detection' raise IOError(msg) dates.append(detection.detect_time.datetime) template_names.append(detection.template_name) _dates = [] _template_names = [] mult_cols = [] for template_name in sorted(set(template_names)): _template_names.append(template_name) _dates.append([date for i, date in enumerate(dates) if template_names[i] == template_name]) # Assign this template the color of its corresponding multiplet for i, mult in enumerate(mult_list): if template_name in mult: mult_cols.append(col_dict[i]) dates = _dates template_names = _template_names fig, ax = plt.subplots() for i, (d_list, temp_name, mult_col) in enumerate(zip(dates, template_names, mult_cols)): y = np.empty(len(d_list)) y.fill(i) d_list.sort() ax.plot(d_list, y, '--o', color=mult_col, linewidth=0.2, #markerfacecolor=colorsList[i - 1], markersize=3, markeredgewidth=0, markeredgecolor='k', label=temp_name) fig.autofmt_xdate() return ax