def main(): parser = argparse.ArgumentParser() parser.add_argument('--inventory', type=str, help='File containing a FDSNStationXML list of stations to retrieve') parser.add_argument('--events', type=str, help='Path to the directory that contains the events') parser.add_argument('--output', type=str, help='Path to the output directory') parser.add_argument('--event_fraction', type=float, help='Fraction of events to use') parser.add_argument('--extra_samples', type=int, default=0, help='Number of extra samples to generate for each event') parser.add_argument('--save_mseed', action='store_true', help='Whether to save a miniSEED backup of the streams') parser.add_argument('--use-fdsn', action='store_true', help='Try to fetch streams from FDSN if they are not available locally') args = parser.parse_args() client = fdsn.Client('KNMI') # Store station metadata inventory = read_inventory(args.inventory) station_dict = {} for network in inventory.networks: for station in network.stations: station_dict[(network.code, station.code)] = station # Read event catalog catalog, all_events = read_catalog_file(args.event_fraction, station_dict) # Count events sum_events = 0 num_stations = 0 for network_code, station_code in catalog: num_events = len(catalog[(network_code, station_code)]) logger.debug('%s %s --- %d events', network_code, station_code, num_events) sum_events += num_events num_stations += 1 logger.info('Events: %d / %d Stations: %d', sum_events, len(all_events), num_stations) # Create directories if needed for version in ['mseed', 'raw', 'filter']: for sample_type in [str(event_type) for event_type in range(4)]: dirname = os.path.join(args.output, version, sample_type, '') if not os.path.exists(dirname): os.makedirs(dirname) # Get events num_event_samples = get_event_samples(client, catalog, inventory, station_dict, args.output, augment_copies=args.extra_samples, save_mseed=args.save_mseed, try_fdsn=args.use_fdsn) logger.info('Saved %d event samples', num_event_samples) # Get noise get_noise_samples_single(client, all_events, inventory, station_dict, num_event_samples * (1 + args.extra_samples), args.output)
def main(args): dirname = args.event_files_path if not os.path.exists(dirname): os.makedirs(dirname) # read list of channels to use inventory = read_inventory(args.channel_file) inventory = inventory.select(channel=args.channel_prefix + 'Z', sampling_rate=args.sampling_rate) depth_str = '' if (args.mindepth == None and args.maxdepth == None) else \ (str(args.mindepth) if args.mindepth != None else (str(-999.9) + '-' + str(args.maxdepth) if args.maxdepth != None else '')) + 'km_' filename_base = '' \ + (str(args.minradius) if args.minradius != None else str(0.0)) + '-' + str(args.maxradius) + 'deg_' \ + depth_str \ + 'M' + str(args.minmagnitude) + '-' \ + (str(args.maxmagnitude) if args.maxmagnitude != None else '') print 'filename_base', filename_base events_starttime = UTCDateTime(args.starttime) events_endtime = UTCDateTime(args.endtime) print 'events_starttime', events_starttime print 'events_endtime', events_endtime for net in inventory: for sta in net: outfile = os.path.join(args.event_files_path, net.code + '_' + sta.code + '_' \ + filename_base + '.xml') client = fdsn.Client(args.base_url) print 'net_sta', net.code + '_' + sta.code print 'sta.start_date', sta.start_date print 'sta.end_date', sta.end_date tstart = sta.start_date if sta.start_date > events_starttime else events_starttime tend = sta.end_date if sta.end_date < events_endtime else events_endtime print 'tstart', tstart print 'tend', tend if not tstart < tend: continue try: catalog = client.get_events(latitude=sta.latitude, longitude=sta.longitude, \ starttime=tstart, endtime=tend, \ minradius=args.minradius, maxradius=args.maxradius, \ mindepth=args.mindepth, maxdepth=args.maxdepth, \ minmagnitude=args.minmagnitude, maxmagnitude=args.maxmagnitude, \ includeallorigins=False, includeallmagnitudes= False, includearrivals=False) except Exception as ex: print 'Skipping net:', net.code, 'sta:', sta.code, 'Exception:', ex, continue #, filename=args.outfile) catalog.write(outfile, 'QUAKEML') print catalog.count(), 'events:', 'written to:', outfile
def run(self): client = fdsn.Client(self.url) args = self._args try: catalog = client.get_events(**args) except fdsn.header.FDSNException as e: if 'No data available' in str(e): self._logger.info('No data available between {} and {}'.format( self._args['starttime'], self._args['endtime'])) else: self._logger.error('FDSNException: ' + str(e)) self.data_received.emit(None) else: result = { 'importer': ObsPyCatalogImporter(catalog), 'time_range': [args.get(a) for a in ['starttime', 'endtime']] } self.data_received.emit(result)
def get_inventory(conf): """ Get or load the inventory depending on whether it exists or not. Be sure to delete the inventory if you change relevant parameters in the config file such as region or channel matching.""" if os.path.exists(conf["inventoryfile"]): inv = read_inventory(conf["inventoryfile"]) return inv else: fdsnclient = fdsn.Client("IRIS") temp = fdsnclient.get_stations( minlatitude=conf['region']['minlatitude'], maxlatitude=conf['region']['maxlatitude'], minlongitude=conf['region']['minlongitude'], maxlongitude=conf['region']['maxlongitude'], level='channel', starttime=conf['starttime'], endtime=conf['endtime']) # Select the channels from the temporary inventory inv = temp.select(channel="[BH]H?") inv.write(conf["inventoryfile"], format='STATIONXML') logging.info(str(inv)) return inv
network = 'CC' if len(sys.argv) > 1: day = UTCDateTime(sys.argv[1]) else: day = UTCDateTime('2017-022T00:00:00.0') secperday = 24 * 60 * 60. datadir = "data/" respdir = "resp/" qcfigs = "qcfigs/" qcdata = "qcdata/" path_verify(qcdata) path_verify(datadir) path_verify(qcfigs) path_verify(respdir) client = fdsn.Client("IRIS", timeout=240) for station in stations: # Look for channel files that already exist files_exist = False files = glob.glob("%s%d/%03d/%s.%s*.seed" % (datadir, day.year, day.julday, network, station)) st = Stream() # Read files from disk if they exist if len(files) > 0: files_exist = True for file in files: st += read(file) else: try: st = client.get_waveforms(network, station, "*", "*", day,
def build_tt_tables(minlat=None, maxlat=None, minlon=None, maxlon=None, channel_codes=['EH', 'BH', 'HH'], db=None, maxdist=500., source_depth=5.): """ channel_codes select channels that start with those codes maximum distance is in km source depth is generally set to the average earthquake depth for the region you are working for more granularity use the 3D associator """ # Create a connection to an sqlalchemy database tt_engine = create_engine(db, echo=False) tt_stations_1D.BaseTT1D.metadata.create_all(tt_engine) TTSession = sessionmaker(bind=tt_engine) tt_session = TTSession() # Create a cliet to IRIS FDSN fdsnclient = fdsn.Client("IRIS") # Create an obspy inventory of stations #http://docs.obspy.org/packages/autogen/obspy.clients.fdsn.client.Client.get_stations.html#obspy.clients.fdsn.client.Client.get_stations inv = fdsnclient.get_stations(minlatitude=minlat, maxlatitude=maxlat, minlongitude=minlon, maxlongitude=maxlon, level='channel') # Plot our results just for fun inv.plot(projection='ortho', color_per_network='True') # Now save these station into the 1D travel-time table database # The associator could be modified to interact with Obspy Inventory objects for net in inv: network = net.code for sta in net: loccodes = [] for ch in sta: # print(ch) # print(dir(ch)) for cc in channel_codes: if re.match(cc, ch.code): if not ch.location_code in loccodes: loccodes.append(ch.location_code) for loc in loccodes: station = tt_stations_1D.Station1D(sta.code, network, loc, sta.latitude, sta.longitude, sta.elevation) # Save the station locations in the database tt_session.add(station) tt_session.commit() # Now we have to build our traveltime lookup tables # We will use IASP91 here but obspy.taup does let you build your own model velmod = taup.TauPyModel(model='iasp91') # Define our distances we want to use in our lookup table delta_distance = 1. # km for spacing tt calculations # Probably better to use a progressive type scheme instead of linear, but this is an example distance_km = np.arange(0, maxdist + delta_distance, delta_distance) for d_km in distance_km: d_deg = geodetics.kilometer2degrees(d_km) ptimes = [] stimes = [] p_arrivals = velmod.get_travel_times(source_depth_in_km=source_depth, distance_in_degree=d_deg, phase_list=['P', 'p']) for p in p_arrivals: ptimes.append(p.time) s_arrivals = velmod.get_travel_times(source_depth_in_km=source_depth, distance_in_degree=d_deg, phase_list=['S', 's']) for s in s_arrivals: stimes.append(s.time) tt_entry = tt_stations_1D.TTtable1D(d_km, d_deg, np.min(ptimes), np.min(stimes), np.min(stimes) - np.min(ptimes)) tt_session.add(tt_entry) tt_session.commit( ) # Probably faster to do the commit outside of loop but oh well tt_session.close() return inv
""" from obspy.clients import fdsn from obspy import UTCDateTime from mt_metadata.timeseries.stationxml import XMLInventoryMTExperiment from mth5.mth5 import MTH5 from mth5.timeseries import RunTS network = "ZU" station = "CAS04" start = UTCDateTime("2020-06-02T18:41:43.000000Z") end = UTCDateTime("2020-07-13T21:46:12.000000Z") # need to know network, station, start and end times before hand client = fdsn.Client("IRIS") # get the data streams = client.get_waveforms(network, station, None, None, start, end) # get the metadata inventory = client.get_stations( start, end, network=network, station=station, level="channel" ) # translate obspy.core.Inventory to an mt_metadata.timeseries.Experiment translator = XMLInventoryMTExperiment() experiment = translator.xml_to_mt(inventory) # initiate MTH5 file m = MTH5() m.open_mth5(r"from_iris_dmc.h5", "w")
# Designed to be run with Programs, Data and Plots subfolders under a Seismic folder # Copy this file into the Programs folder # Copy ShakeNetwork2019.csv into the Data folder # Developed by Mark Vanstone using Thonny, a free Python IDE designed for new programmers # from datetime import datetime from obspy import UTCDateTime, Stream from obspy.geodetics import gps2dist_azimuth from obspy.clients import fdsn, iris import matplotlib.pyplot as plt from matplotlib.transforms import blended_transform_factory client = fdsn.Client() irisclient = iris.Client() # EQ details and paramaters for data selection and plotting eqname = "M6.0 Puerto Rico" eqlat = 17.8694 eqlon = -66.8088 eqlatlon = (eqlat, eqlon) eqtime = "2020-01-11 12:54:45" # Plot parameters plots = [ 'normal', 'section', 'distance' ] # choose normal or section, section by distance or angle, also see sortkey below sectiondx = 1e5 # distance between tick marks on x-axis of distance section angledx = 2 # angle between tick marks on x-axis of section by angle sortkey = 0 # 0 = sort by distance from epicentre, 2 = sort by azimuth, which is effective for the normal plot # Vancouver Island excludes - noisy or geographically misplaced recorders exclude = [ 'RB293', 'R93B1', 'R7813', 'R7783', 'R5A78', 'RDCBA', 'R1E5E', 'R923A', 'RE650', 'R37BE', 'RB0B5', 'R3D81', 'R6392', 'RCD29', 'RCE32', 'R6324',
def get_inventory_from_df(self, df, client=None, data=True): """ Get an :class:`obspy.Inventory` object from a :class:`pandas.DataFrame` :param df: DataFrame with columns - 'network' --> FDSN Network code - 'station' --> FDSN Station code - 'location' --> FDSN Location code - 'channel' --> FDSN Channel code - 'start' --> Start time YYYY-MM-DDThh:mm:ss - 'end' --> End time YYYY-MM-DDThh:mm:ss :type df: :class:`pandas.DataFrame` :param client: FDSN client :type client: string :param data: True if you want data False if you want just metadata, defaults to True :type data: boolean, optional :return: An inventory of metadata requested and data :rtype: :class:`obspy.Inventory` and :class:`obspy.Stream` .. seealso:: https://docs.obspy.org/packages/obspy.clients.fdsn.html#id1 .. note:: If any of the column values are blank, then any value will searched for. For example if you leave 'station' blank, any station within the given start and end time will be returned. """ if client is not None: self.client = client df = self._validate_dataframe(df) # get the metadata from an obspy client client = fdsn.Client(self.client) # creat an empty stream to add to streams = obsread() streams.clear() inv = Inventory(networks=[], source="MTH5") # sort the values to be logically ordered df.sort_values(self.column_names[:-1]) used_network = dict() used_station = dict() for row in df.itertuples(): # First for loop builds out networks and stations if row.network not in used_network: net_inv = client.get_stations( row.start, row.end, network=row.network, level="network" ) returned_network = net_inv.networks[0] used_network[row.network] = [row.start] elif used_network.get( row.network ) is not None and row.start not in used_network.get(row.network): net_inv = client.get_stations( row.start, row.end, network=row.network, level="network" ) returned_network = net_inv.networks[0] used_network[row.network].append(row.start) else: continue for st_row in df.itertuples(): if row.network != st_row.network: continue else: if st_row.station not in used_station: sta_inv = client.get_stations( st_row.start, st_row.end, network=row.network, station=st_row.station, level="station", ) returned_sta = sta_inv.networks[0].stations[0] used_station[st_row.station] = [st_row.start] elif used_station.get( st_row.station ) is not None and st_row.start not in used_station.get( st_row.station ): # Checks for epoch sta_inv = client.get_stations( st_row.start, st_row.end, network=st_row.network, station=st_row.station, level="station", ) returned_sta = sta_inv.networks[0].stations[0] used_station[st_row.station].append(st_row.start) else: continue for ch_row in df.itertuples(): if ( ch_row.network == row.network and st_row.station == ch_row.station and ch_row.start == st_row.start ): cha_inv = client.get_stations( ch_row.start, ch_row.end, network=ch_row.network, station=ch_row.station, loc=ch_row.location, channel=ch_row.channel, level="response", ) returned_chan = cha_inv.networks[0].stations[0].channels[0] returned_sta.channels.append(returned_chan) # ----------------------------- # get data if desired if data: streams = ( client.get_waveforms( ch_row.network, ch_row.station, ch_row.location, ch_row.channel, UTCDateTime(ch_row.start), UTCDateTime(ch_row.end), ) + streams ) else: continue returned_network.stations.append(returned_sta) inv.networks.append(returned_network) return inv, streams
def ant_download(): #=============================================================================== # preliminaries #=============================================================================== comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() outdir = os.path.join('data','raw') targetloc=os.path.join(outdir,'rank'+str(rank)) if not os.path.isdir(targetloc): os.mkdir(targetloc) respfileloc=os.path.join('meta','resp') if os.path.isdir(respfileloc)==False: cmd='mkdir '+respfileloc os.system(cmd) if rank == 0: client = fdsn.Client() #=============================================================================== #- read station list #- create output directory #- set parameters #=============================================================================== # network, channel, location and station list #stalist=cfg.ids#os.path.join('input','downloadlist.txt') fh=open(cfg.ids,'r') ids=fh.read().split('\n') # Verbose? if cfg.verbose: v=True vfetchdata='-v ' else: vfetchdata='' # Quality? quality = cfg.quality # time interval of request t1=cfg.t_start t1str=UTCDateTime(t1).strftime('%Y.%j.%H.%M.%S') t2=cfg.t_end t2str=UTCDateTime(t2).strftime('%Y.%j.%H.%M.%S') # data segment length if cfg.seconds_segment==None: winlen=UTCDateTime(t2)-UTCDateTime(t1) else: winlen = int(cfg.seconds_segment) # minimum length minlen = int(cfg.seconds_minimum) # geographical region lat_min=cfg.lat_min lat_max=cfg.lat_max lon_min=cfg.lon_min lon_max=cfg.lon_max #=============================================================================== #- Assign each rank its own chunk to download #=============================================================================== clen=int(float(len(ids))/float(size)) chunk=(rank*clen, (rank+1)*clen) myids=ids[chunk[0]:chunk[1]] if rank==size-1: myids=ids[chunk[0]:] #=============================================================================== # Station loop #=============================================================================== for id in myids: if id=='': continue network=id.split('.')[0] station=id.split('.')[1] channel=id.split('.')[3] #=============================================================================== # Time window loop #=============================================================================== t = UTCDateTime(t1) while t < UTCDateTime(t2): tstart = UTCDateTime(t).strftime('%Y-%m-%d') tstartstr = UTCDateTime(t).strftime('%Y.%j.%H.%M.%S') tstep = min((UTCDateTime(t)+winlen),UTCDateTime(t2)).\ strftime('%Y-%m-%d') tstepstr = min((UTCDateTime(t)+winlen),UTCDateTime(t2)).\ strftime('%Y.%j.%H.%M.%S') #-Formulate a polite request filename=os.path.join(targetloc,id+'.'+tstartstr+'.'+tstepstr+'.mseed') if os.path.exists(filename)==False: #print network, station, location, channel print('\n Rank '+str(rank),file=None) print('\n Attempting to download data from: '+id,file=None) print(filename) reqstring_iris = '{} {} -N {} -S {} -C {} -s {} -e {} -msl {} --lat \ {}:{} --lon {}:{} -o {} -Q {}'.format(os.path.join(_ROOT,'tools_ext','FetchData')\ ,vfetchdata,network,station,channel,tstart,tstep,minlen,lat_min,lat_max,lon_min,\ lon_max,filename,quality) reqstring_arclink = '{} {} -N {} -S {} -C {} -s {} -e {} -msl {} --lat \ {}:{} --lon {}:{} -o {} -Q {}'.format(os.path.join(_ROOT,'tools_ext','FetchDataArc')\ ,vfetchdata,network,station,channel,tstart,tstep,minlen,lat_min,lat_max,lon_min,\ lon_max,filename,quality) #reqstring=_ROOT+'/tools/FetchData '+vfetchdata+' -N '+network+ \ # ' -S '+station+' -C '+channel+' -s '+tstart+' -e '+tstep+ \ # ' -msl '+minlen+' --lat '+lat_min+':'+lat_max+ \ #' --lon '+lon_min+':'+lon_max+' -o '+filename+' -Q '+quality if cfg.data_center == 'iris' or cfg.data_center=='any': os.system(reqstring_iris) elif cfg.data_center == 'arclink' or cfg.data_center=='any': os.system(reqstring_arclink) t += winlen tstart = UTCDateTime(t1).strftime('%Y-%m-%d') print('\n Downloading response information from: '+id+'\n') #=============================================================================== # Within Station loop: Download resp files #=============================================================================== reqstring_resp_iris = '{} {} -N {} -S {} -C {} -s {} -e {} --lat \ {}:{} --lon {}:{} -rd {} -Q {}'.format( os.path.join(_ROOT,'tools_ext','FetchData'),vfetchdata,network\ ,station,channel,tstart,tstep,lat_min,lat_max,lon_min,\ lon_max,respfileloc,quality) reqstring_resp_arclink = '{} {} -N {} -S {} -C {} -s {} -e {} --lat \ {}:{} --lon {}:{} -rd {} -Q {}'.format( os.path.join(_ROOT,'tools_ext','FetchDataArc'),vfetchdata,network\ ,station,channel,tstart,tstep,lat_min,lat_max,lon_min,\ lon_max,respfileloc,quality) if cfg.data_center == 'iris' or cfg.data_center=='any': os.system(reqstring_resp_iris) elif cfg.data_center == 'arclink' or cfg.data_center=='any': os.system(reqstring_resp_arclink) # Clean up (some files come back with 0 data) os.system(os.path.join(_ROOT,'tools','cleandir.sh')+' '+targetloc) cmd = 'mv '+targetloc+'/* '+targetloc+'/..' print(cmd) os.system(cmd) os.system('rmdir '+targetloc) #=============================================================================== # Separate Station loop: Download stationxml #=============================================================================== if rank == 0: for id in ids: if id=='': continue network=id.split('.')[0] station=id.split('.')[1] xmlfile=os.path.join('meta','stationxml','{}.{}.xml'.format(network,station)) # Metadata request with obspy if os.path.exists(xmlfile)==False: client.get_stations(network=network,station=station, filename=xmlfile,level='response') comm.Barrier() #=============================================================================== # After download completed on all ranks: Check availability #============================================================================== if rank==0: outfile=os.path.join(outdir,'download_report.txt') outf=open(outfile,'w') print('Attempted to download data from stations: \n',file=outf) print('****************************************** \n',file=outf) for id in ids: print(id,file=outf) print('****************************************** \n',file=outf) stalist=os.path.join('input','downloadlist.txt') fh=open(stalist,'r') ids=fh.read().split('\n') noreturn=[] for id in ids: if id=='': continue fls=glob(os.path.join(outdir,id+'*')) fls.sort() if fls != []: print('Files downloaded for id: '+id,file=outf) print('First file: '+fls[0],file=outf) print('Last file: '+fls[-1],file=outf) print('****************************************** \n',file=outf) else: noreturn.append(id) if noreturn != []: print('NO files downloaded for: \n',file=outf) print(noreturn,file=outf) print('****************************************** \n',file=outf) print('Download parameters were: \n',file=outf) print('****************************************** \n',file=outf) outf.close() os.system('cat input/config_download.json >> '+outfile) return()
def main(args): random.seed(datetime.now()) if args.n_distances < 1: args.n_distances = None # print distance classifications if args.n_distances != None: print 'dist_class, dist_deg, dist_km' for dclass in range(0, args.n_distances, 1): dist_deg = util.classification2distance(dclass, args.n_distances) dist_km = geo.degrees2kilometers(dist_deg) print "{} {:.2f} {:.1f}".format(dclass, dist_deg, dist_km) print '' if args.n_magnitudes < 1: args.n_magnitudes = None # print magtitude classifications if args.n_magnitudes != None: print 'mag_class, mag' for mclass in range(0, args.n_magnitudes, 1): mag = util.classification2magnitude(mclass, args.n_magnitudes) print "{} {:.2f}".format(mclass, mag) print '' if args.n_depths < 1: args.n_depths = None # print depth classifications if args.n_depths != None: print 'depth_class, depth' for dclass in range(0, args.n_depths, 1): depth = util.classification2depth(dclass, args.n_depths) print "{} {:.1f}".format(dclass, depth) print '' if args.n_azimuths < 1: args.n_azimuths = None # print azimuth classifications if args.n_azimuths != None: print 'azimuth_class, azimuth' for aclass in range(0, args.n_azimuths, 1): azimuth = util.classification2azimuth(aclass, args.n_azimuths) print "{} {:.1f}".format(aclass, azimuth) print '' if not os.path.exists(args.outpath): os.makedirs(args.outpath) # save arguments with open(os.path.join(args.outpath, 'params.pkl'), 'w') as file: file.write(pickle.dumps(args)) # use `pickle.loads` to do the reverse for dataset in ['train', 'validate', 'test']: for datatype in ['events', 'noise']: datapath = os.path.join(args.outpath, dataset, datatype) if not os.path.exists(datapath): os.makedirs(datapath) mseedpath = os.path.join(datapath, 'mseed') if not os.path.exists(mseedpath): os.makedirs(mseedpath) mseedpath = os.path.join(datapath, 'mseed_raw') if not os.path.exists(mseedpath): os.makedirs(mseedpath) if datatype == 'events': xmlpath = os.path.join(datapath, 'xml') if not os.path.exists(xmlpath): os.makedirs(xmlpath) # read catalog of events #filenames = args.event_files_path + os.sep + '*.xml' catalog_dict = {} catalog_all = [] for dirpath, dirnames, filenames in os.walk(args.event_files_path): for name in filenames: if name.endswith(".xml"): file = os.path.join(dirpath, name) catalog = read_events(file) target_count = int(args.event_fraction * float(catalog.count())) print catalog.count(), 'events:', 'read from:', file, 'will use:', target_count, 'since args.event_fraction=', args.event_fraction if (args.event_fraction < 1.0): while catalog.count() > target_count: del catalog[random.randint(0, catalog.count() - 1)] if not args.systematic: tokens = name.split('_') net_sta = tokens[0] + '_' + tokens[1] if not net_sta in catalog_dict: catalog_dict[net_sta] = catalog else: catalog_dict[net_sta] += catalog # sort catalog by date catalog_dict[net_sta] = Catalog(sorted(catalog_dict[net_sta], key=lambda e: e.origins[0].time)) else: catalog_all += catalog # read list of channels to use inventory_full = read_inventory(args.channel_file) inventory_full = inventory_full.select(channel=args.channel_prefix+'Z', sampling_rate=args.sampling_rate) #print(inventory) client = fdsn.Client(args.base_url) # get existing already processed event channel dictionary try: with open(os.path.join(args.outpath, 'event_channel_dict.pkl'), 'r') as file: event_channel_dict = pickle.load(file) except IOError: event_channel_dict = {} print 'Existing event_channel_dict size:', len(event_channel_dict) n_noise = int(0.5 + float(args.n_streams) * args.noise_fraction) n_events = args.n_streams - n_noise n_validate = int(0.5 + float(n_events) * args.validation_fraction) n_test = int(0.5 + float(n_events) * args.test_fraction) n_train = n_events - n_validate - n_test n_count = 0; n_streams = 0 if args.systematic: event_ndx = 0 net_ndx = 0 sta_ndx = 0 channel_ndx = -1 # distance_id_count = {} # max_num_for_distance_id = {} # if args.n_distances != None: # # train # distance_id_count['train'] = [0] * args.n_distances # max_num_for_distance_id['train'] = 1 + int(2.0 * float(n_train) / float(args.n_distances)) # print 'Maximum number events for each distance bin train:', max_num_for_distance_id['train'] # # validate # distance_id_count['validate'] = [0] * args.n_distances # max_num_for_distance_id['validate'] = 1 + int(2.0 * float(n_validate) / float(args.n_distances)) # print 'Maximum number events for each distance bin validate:', max_num_for_distance_id['validate'] # # test # distance_id_count['test'] = [0] * args.n_distances # max_num_for_distance_id['test'] = 1 + int(2.0 * float(n_test) / float(args.n_distances)) # print 'Maximum number events for each distance bin test:', max_num_for_distance_id['test'] while args.systematic or n_streams < args.n_streams: try: # choose event or noise is_noise = n_streams >= n_events # reset validate test count if switching from event to noise if n_streams == n_events: n_validate = int(0.5 + float(n_noise) * args.validation_fraction) n_test = int(0.5 + float(n_noise) * args.test_fraction) n_train = n_noise - n_validate - n_test n_count = 0; # set out paths if is_noise: datatype = 'noise' else: datatype = 'events' if n_count < n_train: dataset = 'train' elif n_count < n_train + n_validate: dataset = 'validate' else: dataset = 'test' datapath = os.path.join(args.outpath, dataset, datatype) # get random channel from Inventory #inventory = inventory_full.select(time=origin.time) inventory = inventory_full if args.systematic: try: catalog, event_ndx, event, origin, channel, net_ndx, net, sta_ndx, sta, channel_ndx \ = get_systematic_channel(inventory, catalog_all, is_noise, event_ndx, net_ndx, sta_ndx, channel_ndx) except ValueError: break else: try: catalog, event_ndx, event, origin, channel, net_ndx, net, sta_ndx, sta, channel_ndx = get_random_channel(inventory, catalog_dict, is_noise) except ValueError: continue distance_id = 0 distance = -999.0 magnitude = -999.0 depth = -999.0 azimuth = -999.0 if not is_noise: dist_meters, azim, bazim = geo.gps2dist_azimuth(channel.latitude, channel.longitude, origin.latitude, origin.longitude, a=geo.WGS84_A, f=geo.WGS84_F) distance = geo.kilometer2degrees(dist_meters / 1000.0, radius=6371) azimuth = azim magnitude = event.preferred_magnitude().mag depth = origin.depth / 1000.0 if args.n_distances != None: distance_id = util.distance2classification(distance, args.n_distances) # if distance_id_count[dataset][distance_id] >= max_num_for_distance_id[dataset]: # print 'Skipping event_channel: distance bin', distance_id, 'for', dataset, 'already full:', \ # distance_id_count[dataset][distance_id], '/', max_num_for_distance_id[dataset] # continue print '' print 'Event:', origin.time.isoformat(), event.event_descriptions[0].text, \ ', Dist(deg): {:.2f} Dist(km): {:.1f} ID: {}'.format(distance, geo.degrees2kilometers(distance), distance_id), \ ', Mag: {:.2f}'.format(magnitude), \ ', Depth(km): {:.1f}'.format(depth), \ ', Az(deg): {:.1f}'.format(azimuth) print 'Retrieving channels:', (n_streams + 1), '/ ', args.n_streams, (', NOISE, ' if is_noise else ', EVENT, '), 'event', event_ndx, origin.time, \ ', net', net_ndx, ', sta', sta_ndx, ', chan', channel_ndx, \ ', ', net.code, sta.code, \ channel.code, channel.location_code, \ channel.sample_rate # check station was available at origin.time if not sta.is_active(time=origin.time): print 'Skipping event_channel: station not active at origin.time:' continue #key = str(event_ndx) + '_' + str(net_ndx) + '_' + str(sta_ndx) + '_' + str(channel_ndx) + '_' + str(is_noise) key = str(event_ndx) + '_' + net.code + '_' + sta.code + '_' + channel.code + '_' + str(is_noise) if key in event_channel_dict: print 'Skipping event_channel: already processed.' continue event_channel_dict[key] = 1 # get start time for waveform request ttime = get_first_P_travel_time(origin, channel) arrival_time = origin.time + ttime if is_noise: # get start time of next event event2 = catalog[event_ndx + 1] origin2 = event2.preferred_origin() # check that origins are at least min time apart if origin2.time - origin.time < MIN_INTER_EVENT_TIME: print 'Skipping noise event_channel: inter event time too small: ', str(origin2.time - origin.time), \ origin2.time, origin.time continue ttime2 = get_first_P_travel_time(origin2, channel) arrival_time2 = origin2.time + ttime2 arrival_time = (arrival_time + ((arrival_time2 - arrival_time) / 2.0)) - args.window_start start_time = arrival_time - args.window_start # request data for 3 channels #for orientation in ['Z', 'N', 'E', '1', '2']: # req_chan = args.channel_prefix + orientation channel_name = net.code + '_' + sta.code + '_' + channel.location_code + '_' + args.channel_prefix padded_start_time = start_time - WINDOW_PADDING_FDSN padded_end_time = start_time + args.window_length + 2.0 * WINDOW_PADDING_FDSN chan_param = args.channel_prefix + '?' # kluge to get url used for data request kwargs = {'network': net.code, 'station': sta.code, 'location': channel.location_code, 'channel': chan_param, 'starttime': padded_start_time, 'endtime': padded_end_time} #url = client._create_url_from_parameters('dataselect', DEFAULT_PARAMETERS['dataselect'], **kwargs) url = fdsn.client.build_url(client.base_url, 'dataselect', client.major_versions['dataselect'], "query", parameters=kwargs) print ' java net.alomax.seisgram2k.SeisGram2K', '\"', url, '\"' try: stream = client.get_waveforms( \ net.code, sta.code, channel.location_code, chan_param, \ padded_start_time, padded_end_time, \ attach_response=True) except fdsn.header.FDSNException as ex: print 'Skipping channel:', channel_name, 'FDSNException:', ex, continue print stream # TEST # for trace in stream: # print '==========> trace.stats', trace.stats # check some things if (len(stream) != 3): print 'Skipping channel: len(stream) != 3:', channel_name continue ntrace = 0 for trace in stream: if (len(trace) < 1): print 'Skipping trace: len(trace) < 1:', channel_name continue if (trace.stats.starttime > start_time or trace.stats.endtime < start_time + args.window_length): print 'Skipping trace: does not contain required time window:', channel_name continue ntrace += 1 if (ntrace != 3): print 'Skipping channel: ntrace != 3:', channel_name continue # pre-process streams # sort so that channels will be ingested in NN always in same order ENZ stream.sort(['channel']) # detrend - this is meant to be equivalent to detrend or a long period low-pass (e.g. at 100sec) applied to real-time data stream.detrend(type='linear') for trace in stream: # correct for required sampling rate if abs(trace.stats.sampling_rate - args.sampling_rate) / args.sampling_rate > 0.01: trace.resample(args.sampling_rate) # apply high-pass filter if requested if args.hp_filter_freq > 0.0: stream.filter('highpass', freq=args.hp_filter_freq, corners=args.hp_filter_corners) # check signal to noise ratio, if fail, repeat on 1sec hp data to capture local/regional events in longer period microseismic noise sn_type = 'BRB' first_pass = True; while True: if is_noise: snrOK = True else: snrOK = False for trace in stream: # slice with 1sec margin of error for arrival time to: 1) avoid increasing noise amplitude with signal, 2) avoid missing first P in signal if (first_pass): signal_slice = trace.slice(starttime=arrival_time - 1.0, endtime=arrival_time - 1.0 + args.snr_window_length) noise_slice = trace.slice(endtime=arrival_time - 1.0) else: # highpass at 1sec filt_trace = trace.copy() filt_trace.filter('highpass', freq=1.0, corners=4) signal_slice = filt_trace.slice(starttime=arrival_time - 1.0, endtime=arrival_time - 1.0 + args.snr_window_length) noise_slice = filt_trace.slice(endtime=arrival_time - 1.0) sn_type = '1HzHP' # check signal to noise around arrival_time # ratio of std asignal = signal_slice.std() anoise = noise_slice.std() snr = asignal / anoise print trace.id, sn_type, 'snr:', snr, 'std_signal:', asignal, 'std_noise:', anoise # ratio of peak amplitudes (DO NOT USE, GIVE UNSTABLE RESULTS!) # asignal = signal_slice.max() # anoise = noise_slice.max() # snr = np.absolute(asignal / anoise) # print trace.id, sn_type, 'snr:', snr, 'amax_signal:', asignal, 'amax_noise:', anoise if is_noise: snrOK = snrOK and snr <= MAX_SNR_NOISE if not snrOK: break else: snrOK = snrOK or snr >= args.snr_accept if (first_pass and not snrOK and args.hp_filter_freq < 0.0): first_pass = False; continue else: break if (not snrOK): if is_noise: print 'Skipping channel:', sn_type, 'snr >', MAX_SNR_NOISE, 'on one or more traces:', channel_name else: print 'Skipping channel:', sn_type, 'snr < args.snr_accept:', args.snr_accept, 'on all traces:', channel_name continue # trim data to required window # try to make sure samples and start/end times align as closely as possible to first trace trace = stream.traces[0] trace = trace.slice(starttime=start_time, endtime=start_time + args.window_length, nearest_sample=True) start_time = trace.stats.starttime stream = stream.slice(starttime=start_time, endtime=start_time + args.window_length, nearest_sample=True) cstart_time = '%04d.%02d.%02d.%02d.%02d.%02d.%03d' % \ (start_time.year, start_time.month, start_time.day, start_time.hour, start_time.minute, \ start_time.second, start_time.microsecond // 1000) # process each trace try: for trace in stream: # correct for overall sensitivity or gain trace.normalize(trace.stats.response.instrument_sensitivity.value) trace.data = trace.data.astype(np.float32) # write miniseed #tracefile = os.path.join(datapath, 'mseed', trace.id + '.' + cstart_time + '.mseed') #trace.write(tracefile, format='MSEED', encoding='FLOAT32') #print 'Channel written:', tracefile, trace.count(), 'samples' except AttributeError as err: print 'Skipping channel:', channel_name, ': Error applying trace.normalize():' , err filename_root = channel_name + '.' + cstart_time # write raw miniseed streamfile = os.path.join(datapath, 'mseed_raw', filename_root + '.mseed') stream.write(streamfile, format='MSEED', encoding='FLOAT32') print 'Stream written:', stream.count(), 'traces:' print ' java net.alomax.seisgram2k.SeisGram2K', streamfile # store absolute maximum stream_max = np.absolute(stream.max()).max() # normalize by absolute maximum stream.normalize(global_max = True) # 20180521 AJL # spherical coordinates # raw data always in same order ENZ # tensor indexing is [traces, datapoints, comps] if args.spherical: rad2deg = 180.0 / math.pi # calculate modulus temp_square = np.add(np.square(stream.traces[0].data), np.add(np.square(stream.traces[1].data), np.square(stream.traces[2].data))) temp_modulus = np.sqrt(temp_square) # calculate azimuth temp_azimuth = np.add( np.multiply(np.arctan2(stream.traces[0].data, stream.traces[1].data), rad2deg), 180.0) # calculate inclination temp_inclination = np.multiply(np.arcsin(np.divide(stream.traces[2].data, temp_modulus)), rad2deg) # reset stream data to spherical coordinates stream.traces[0].data = temp_inclination stream.traces[1].data = temp_azimuth temp_modulus = np.multiply(temp_modulus, 100.0) # increase scale for plotting purposes stream.traces[2].data = temp_modulus # put absolute maximum normalization in first element of data array, to seed NN magnitude estimation # 20180816 AJL - do not mix max with data # for trace in stream: # trace.data[0] = stream_max print 'stream_max', stream_max # write processed miniseed streamfile = os.path.join(datapath, 'mseed', filename_root + '.mseed') stream.write(streamfile, format='MSEED', encoding='FLOAT32') print 'Stream written:', stream.count(), 'traces:' print ' java net.alomax.seisgram2k.SeisGram2K', streamfile # write event waveforms and distance_id in .tfrecords magnitude_id = 0 depth_id = 0 azimuth_id = 0 if not is_noise: # if args.n_distances != None: # distance_id_count[dataset][distance_id] += 1 if args.n_magnitudes != None: magnitude_id = util.magntiude2classification(magnitude, args.n_magnitudes) if args.n_depths != None: depth_id = util.depth2classification(depth, args.n_depths) if args.n_azimuths != None: azimuth_id = util.azimuth2classification(azimuth, args.n_azimuths) else: distance_id = -1 distance = 0.0 output_name = filename_root + '.tfrecords' output_path = os.path.join(datapath, output_name) writer = DataWriter(output_path) writer.write(stream, stream_max, distance_id, magnitude_id, depth_id, azimuth_id, distance, magnitude, depth, azimuth) if not is_noise: print '==== Event stream tfrecords written:', output_name, \ 'Dist(deg): {:.2f} Dist(km): {:.1f} ID: {}'.format(distance, geo.degrees2kilometers(distance), distance_id), \ ', Mag: {:.2f} ID: {}'.format(magnitude, magnitude_id), \ ', Depth(km): {:.1f} ID: {}'.format(depth, depth_id), \ ', Az(deg): {:.1f} ID: {}'.format(azimuth, azimuth_id) else: print '==== Noise stream tfrecords written:', output_name, 'ID: Dist {}, Mag {}, Depth {}, Az {}'.format(distance_id, magnitude_id, depth_id, azimuth_id) # write event data if not is_noise: filename = os.path.join(datapath, 'xml', filename_root + '.xml') event.write(filename, 'QUAKEML') n_streams += 1 n_count += 1 except KeyboardInterrupt: print 'Stopping: KeyboardInterrupt' break except Exception as ex: print 'Skipping stream: Exception:', ex traceback.print_exc() continue print n_streams, 'streams:', 'written to:', args.outpath # save event_channel_dict with open(os.path.join(args.outpath, 'event_channel_dict.pkl'), 'w') as file: file.write(pickle.dumps(event_channel_dict))