def download_data(cat, downloaded=True): import obspy from obspy.clients.fdsn.mass_downloader import CircularDomain, \ Restrictions, MassDownloader if downloaded == False: # First, define a domain. domain = CircularDomain(latitude=cat[0].origins[0].latitude, longitude=cat[0].origins[0].longitude, minradius=0.25, maxradius=5.0) # Second, define some additional restrictions. restrictions = Restrictions( starttime=cat[0].origins[0].time - 0.5 * 60, endtime=cat[0].origins[0].time + 5 * 60, minimum_interstation_distance_in_m=100E3, channel="BHZ", ) # If you leave the providers empty it will loop through # all data centers it knows. # It will prefer data from the first providers. mdl = MassDownloader(providers=["SCEDC", "NCEDC", "IRIS"]) # Finally launch it. mdl.download(domain, restrictions, mseed_storage="waveforms", stationxml_storage="stations") else: print('Data has been downloaded')
def _download_cont(nw, starttime, endtime): ''' Download data for each network Return: None ''' print(f'======Download data for network {nw}: {starttime} - {endtime}======') restrictions = Restrictions( starttime = starttime, endtime = endtime, chunklength_in_sec = para["Station Info"].getint("chunksize", 1) * 24 * 60 * 60, network = nw, station = para["Station Info"].get("station", "*"), channel_priorities = para["Station Info"].get("channelpri", "*").split(","), reject_channels_with_gaps = False, minimum_length = 0.0, minimum_interstation_distance_in_m = 100.0) mdl = MassDownloader(providers=["IRIS"]) mdl.download( domain, restrictions, mseed_storage = para["DEFAULT"].get("projdir") + "/waveform/{station}/{network}.{station}.{location}.{channel}__{starttime}__{endtime}.mseed", stationxml_storage = para["DEFAULT"].get("projdir") + "/station/{network}.{station}.xml") return
def create_config(conf='conf.json', tutorial=False, less_data=False): """Create JSON config file and download tutorial data if requested""" shutil.copyfile(resource_filename('yam', 'conf_example.json'), conf) temp_dir = os.path.join(tempfile.gettempdir(), 'yam_example_data') template = os.path.join(temp_dir, 'example_data') station_template = os.path.join(temp_dir, 'example_inventory') try: num_files = (len([name for name in os.listdir(template)]), len([name for name in os.listdir(station_template)])) except FileNotFoundError: num_files = (0, 0) if tutorial and (num_files[0] < (9 if less_data else 54) or num_files[1] < 3): print('Download example data from Geofon') from obspy import UTCDateTime as UTC from obspy.clients.fdsn.mass_downloader import (GlobalDomain, Restrictions, MassDownloader) domain = GlobalDomain() restrictions = Restrictions( starttime=UTC('2010-02-04' if less_data else '2010-02-01'), endtime=UTC('2010-02-06' if less_data else '2010-02-15'), network='CX', station='PATCX', location=None, channel_priorities=["BH[ZN]"], chunklength_in_sec=86400, reject_channels_with_gaps=False, minimum_length=0.5) mdl = MassDownloader(providers=['GFZ']) kw = dict(threads_per_client=1, download_chunk_size_in_mb=200) mdl.download(domain, restrictions, template, station_template, **kw) restrictions.station = 'PB06' if not less_data: restrictions.endtime = UTC('2010-02-12') mdl.download(domain, restrictions, template, station_template, **kw) restrictions.station = 'PB01' restrictions.endtime = UTC('2010-02-04 08:00:00') restrictions.channel_priorities = ["BHZ"] mdl.download(domain, restrictions, template, station_template, **kw) if not less_data: restrictions.starttime = UTC('2010-02-08 00:00:00') restrictions.endtime = UTC('2010-02-09 23:55:00') restrictions.channel_priorities = ["BHZ"] mdl.download(domain, restrictions, template, station_template, **kw) if tutorial: dest_dir = os.path.dirname(conf) dest_dir_data = os.path.join(dest_dir, 'example_data') dest_dir_inv = os.path.join(dest_dir, 'example_inventory') if not os.path.exists(dest_dir_data): if less_data: ignore = shutil.ignore_patterns('*2010020[123]T000000Z__*', '*2010020[6-9]T000000Z__*', '*2010021?T000000Z__*') else: ignore = None shutil.copytree(template, dest_dir_data, ignore=ignore) if not os.path.exists(dest_dir_inv): shutil.copytree(station_template, dest_dir_inv)
def download_data(self, event, providers=None): """ """ event = self.comm.events.get(event) from obspy.clients.fdsn.mass_downloader import MassDownloader, \ Restrictions, GlobalDomain proj = self.comm.project if isinstance(proj.domain, lasif.domain.GlobalDomain): domain = GlobalDomain() else: domain = self._get_spherical_section_domain(proj.domain) event_time = event["origin_time"] ds = proj.config["download_settings"] starttime = event_time - ds["seconds_before_event"] endtime = event_time + ds["seconds_after_event"] mseed_storage = os.path.join(proj.paths["data"], event["event_name"], "raw") # Attempt to get StationXML data for a very long time span. This has # the nice side effect that StationXML files will mostly be shared # between events. restrictions = Restrictions( starttime=starttime, endtime=endtime, # Go back 10 years. station_starttime=starttime - 86400 * 365.25 * 10, # Advance 10 years. station_endtime=endtime + 86400 * 365.25 * 10, network=None, station=None, location=None, channel=None, minimum_interstation_distance_in_m=ds[ "interstation_distance_in_m"], reject_channels_with_gaps=True, minimum_length=0.95, location_priorities=ds["location_priorities"], channel_priorities=ds["channel_priorities"]) stationxml_storage = self._get_stationxml_storage_fct(starttime, endtime) # Also log to file for reasons of provenance and debugging. logger = logging.getLogger("obspy.clients.fdsn.mass_downloader") fh = logging.FileHandler( self.comm.project.get_log_file("DOWNLOADS", event["event_name"])) fh.setLevel(logging.INFO) FORMAT = "[%(asctime)s] - %(name)s - %(levelname)s: %(message)s" formatter = logging.Formatter(FORMAT) fh.setFormatter(formatter) logger.addHandler(fh) dlh = MassDownloader(providers=providers) dlh.download(domain=domain, restrictions=restrictions, mseed_storage=mseed_storage, stationxml_storage=stationxml_storage)
def _download_crandall(self): """download waveform/station info for dataset.""" bank = WaveBank(self.waveform_path) domain = CircularDomain( self.latitude, self.longitude, minradius=0, maxradius=kilometers2degrees(self.max_dist), ) cat = obspy.read_events(str(self.source_path / "events.xml")) df = events_to_df(cat) for _, row in df.iterrows(): starttime = row.time - self.time_before endtime = row.time + self.time_after restrictions = Restrictions( starttime=UTC(starttime), endtime=UTC(endtime), minimum_length=0.90, minimum_interstation_distance_in_m=100, channel_priorities=["HH[ZNE]", "BH[ZNE]"], location_priorities=["", "00", "01", "--"], ) kwargs = dict( domain=domain, restrictions=restrictions, mseed_storage=str(self.waveform_path), stationxml_storage=str(self.station_path), ) MassDownloader(providers=[self._download_client]).download( **kwargs) # ensure data have actually been downloaded bank.update_index() assert not bank.read_index(starttime=starttime, endtime=endtime).empty
def _download_kemmerer(self): """ downloads both stations and waveforms """ for station in ["M17A", "M18A"]: domain = RectangularDomain( minlatitude=40.0, maxlatitude=43.0, minlongitude=-111.0, maxlongitude=-110.0, ) restrictions = Restrictions( starttime=obspy.UTCDateTime("2009-04-01T00:00:00"), endtime=obspy.UTCDateTime("2009-04-04T00:00:00"), chunklength_in_sec=3600, network="TA", channel="BH?", station=station, reject_channels_with_gaps=False, minimum_length=0.0, minimum_interstation_distance_in_m=10.0, ) MassDownloader(providers=[self._download_client]).download( domain, restrictions, mseed_storage=str(self.waveform_path), stationxml_storage=str(self.station_path), )
def descargar_datos(cat, radiomin, radiomax, start_time, end_time, dist_esta): nombre_evento = cat["region"].values[0] fecha_evento = cat["fecha_evento"].values[0] magnitud = cat["Mw_cmt"].values[0] nombre_evento = str(nombre_evento + " " + str(fecha_evento) + " " + str(magnitud)) # + str(canal) lat_e = cat["lat_cmt"].values[0] lon_e = cat["lon_cmt"].values[0] time = cat["tiempo_cmt"].values[0] time = str(fecha_evento) + "T" + time time = UTCDateTime(time) depth = cat["depth_cmt"].values[0] client = Client("IRIS") domain = CircularDomain(latitude=lat_e, longitude=lon_e, minradius=radiomin, maxradius=radiomax) mdl = MassDownloader(providers=["IRIS"]) restrictions = Restrictions(starttime=time - start_time, endtime=time + end_time, chunklength_in_sec=86400, location="00", channel="BHZ", reject_channels_with_gaps=True, minimum_length=0.95, minimum_interstation_distance_in_m=dist_esta, sanitize=True) ruta = os.getcwd() rutas = "datos/" informacion = ruta + "/" + rutas + nombre_evento n_carpeta_w = nombre_evento + "/waveforms" n_carpeta_s = nombre_evento + "/stations" informacion = informacion.replace("/", "\\") try: os.mkdir(informacion) os.chdir(informacion) archivo = open("info.txt", "w") archivo.write(str(cat["id_evento"].values[0]) + "\n") archivo.close() os.chdir("..") mdl.download(domain, restrictions, mseed_storage=n_carpeta_w, stationxml_storage=n_carpeta_s) except: return -100
def download(eqname, t0, min_length=600): domain = GlobalDomain() restrictions = Restrictions(starttime=t0,endtime=t0+min_length,chunklength_in_sec=84600,network="*", station="*", location="", channel="BH*", reject_channels_with_gaps=True,minimum_length=0.0,minimum_interstation_distance_in_m=100.0) waveform_dir = "data/{}/waveforms".format(eqname) stationxml_dir = "data/{}/stations".format(eqname) makedir(waveform_dir) makedir(stationxml_dir) mdl = MassDownloader(providers=["http://eida.koeri.boun.edu.tr:8080"]) # Kandilli FDSN matchtimeseries=True icin bos istasyon listesi donduruyor. mdl._initialized_clients["http://eida.koeri.boun.edu.tr:8080"].services["station"].pop("matchtimeseries") mdl.download(domain, restrictions, mseed_storage=waveform_dir, stationxml_storage=stationxml_dir)
def get_data(date, latitude, longitude): resp_files = '/Users/gsilveira/Documents/MadalenaMatias/dataless/resp/' from obspy.clients.fdsn.mass_downloader import RectangularDomain, \ Restrictions, MassDownloader domain = RectangularDomain( minlatitude=-55.5, maxlatitude=latitude, #66.7, minlongitude=-71.5, maxlongitude=longitude) restrictions = Restrictions( # Get data for a whole year. starttime=obspy.UTCDateTime(int(date[0]), int(date[1]), int(date[2]), int(date[3]), int(date[4]), int(date[5]), int(date[6])), endtime=obspy.UTCDateTime(2020, 10, 1), # Chunk it to have one file per day. chunklength_in_sec=3600000, # Considering the enormous amount of data associated with continuous # requests, you might want to limit the data based on SEED identifiers. # If the location code is specified, the location priority list is not # used; the same is true for the channel argument and priority list. network="GH", station="AKOS", location="*", channel="*", # The typical use case for such a data set are noise correlations where # gaps are dealt with at a later stage. reject_channels_with_gaps=False, # Same is true with the minimum length. All data might be useful. minimum_length=0.0, # Guard against the same station having different names. minimum_interstation_distance_in_m=100.0) # Restrict the number of providers if you know which serve the desired # data. If in doubt just don't specify - then all providers will be # queried. mdl = MassDownloader(providers=["IRIS"]) mdl.download( domain, restrictions, mseed_storage=("../per_hour/{network}/Leslie/{station}/" "{channel}.{location}.{starttime}.{endtime}.mseed"), stationxml_storage="stations")
def _download_event(eventtime, starttime, endtime): ''' Download each event for all stations Return: None ''' restrictions = Restrictions( starttime = eventtime - starttime * 60, endtime = eventtime + endtime * 60, network = para["Station Info"].get("network", "*"), station = para["Station Info"].get("station", "*"), reject_channels_with_gaps = False, minimum_length = 0.0, channel_priorities = para["Station Info"].get("channelpri", "*").split(",")) mdl = MassDownloader(providers=["IRIS"]) mdl.download( domain, restrictions, mseed_storage = f'{para["DEFAULT"].get("projdir")}/waveform', stationxml_storage = f'{para["DEFAULT"].get("projdir")}/station') return
def download_events(cat, yeartag="2005", firstid=1): """ Download events to lower 48 with MassDownloader using events in obspy Catalog object cat. Parameters for station definitions are hard wired """ wf_storage = yeartag + "/{starttime}/{network}_{station}_{location}.mseed" site_storage = "site_" + yeartag + ".dir" mdl = MassDownloader() domain = RectangularDomain(minlatitude=20.0, maxlatitude=54.0, minlongitude=-135, maxlongitude=-55) count_evid = 1 for event in cat: if (count_evid >= firstid): t0 = time.time() o = event.preferred_origin() # Time attribute is already a UTCDateTime object origin_time = o.time restrictions = Restrictions( starttime=origin_time, endtime=origin_time + 3600.0, reject_channels_with_gaps=True, minimum_length=0.95, minimum_interstation_distance_in_m=100.0, channel_priorities=["BH[ZNE12]"], location_priorities=["", "00", "10", "01"]) wf_storage = ("%s/event%d" % (yeartag, count_evid)) mdl.download(domain, restrictions, mseed_storage=wf_storage, stationxml_storage=site_storage) dt = time.time() - t0 print("Event ", count_evid, " download time (s)=", dt) count_evid += 1
def download_data(starttime, endtime, waveform_dir, station_dir, networks=None, channels=None, providers=None, minimum_length=0.95): # Rectangular domain containing parts of southern Germany. domain = RectangularDomain(minlatitude=-90, maxlatitude=90, minlongitude=-180, maxlongitude=180) if isinstance(channels, list): channel = ",".join(channels) elif channels == "None" or channels is None: channel = None else: raise ValueError("Unknown channels: {}".format(channels)) if isinstance(networks, list): network = ",".join(networks) elif networks == "None" or networks is None: network = None else: raise ValueError("Unknown networks: {}".format(networks)) print("network: ", network) print("channel: ", channel) # Set download restrictions restrictions = Restrictions(starttime=starttime, endtime=endtime, reject_channels_with_gaps=False, minimum_length=minimum_length, station=None, network=network, channel=channel, location_priorities=["", "00", "10"], channel_priorities=["BH[ZNE12]", "HH[ZNE12]"]) if (providers is None) or (providers == "None"): mdl = MassDownloader() else: mdl = MassDownloader(providers=providers) mdl.download(domain, restrictions, mseed_storage=waveform_dir, stationxml_storage=station_dir)
def download_event(event: str): d.mkdir(mseed := f'mseed/{event}') d.mkdir(xml := f'xml/{event}') # Event object e = read_events(f'events/{event}')[0] time = e.preferred_origin().time # call mass downloader restrictions = Restrictions( starttime=time - 600, endtime=time + 7800, reject_channels_with_gaps=True, minimum_length=0.95, channel_priorities=['BH[ZNE12]', 'HH[ZNE12]'], location_priorities=['', '00', '10'] ) MassDownloader().download( GlobalDomain(), restrictions, mseed_storage=d.abs(mseed), stationxml_storage=d.abs(xml) )
def download_global_data(starttime, endtime, waveform_dir, stationxml_dir, stations=None, networks=None, channels=None, location_priorities=None, channel_priorities=None, minimum_length=0.95, reject_channels_with_gaps=True, providers=None): domain = GlobalDomain() station = list2str(stations) network = list2str(networks) channel = list2str(channels) print("network {}: | station: {} | channel: {} ".format( network, station, channel)) time.sleep(2.0) # Set download restrictions restrictions = Restrictions( starttime=starttime, endtime=endtime, reject_channels_with_gaps=reject_channels_with_gaps, minimum_length=minimum_length, station=station, network=network, channel=channel, location_priorities=location_priorities, channel_priorities=channel_priorities) if (providers is None) or (providers == "None"): mdl = MassDownloader() else: mdl = MassDownloader(providers=providers) mdl.download(domain, restrictions, mseed_storage=waveform_dir, stationxml_storage=stationxml_dir)
network=network, # You might not want to deal with gaps in the data. If this setting is # True, any trace with a gap/overlap will be discarded. reject_channels_with_gaps=True, # And you might only want waveforms that have data for at least 95 % of # the requested time span. Any trace that is shorter than 95 % of the # desired total duration will be discarded. minimum_length=0.95, # No two stations should be closer than 10 km to each other. This is # useful to for example filter out stations that are part of different # networks but at the same physical station. Settings this option to # zero or None will disable that filtering. minimum_interstation_distance_in_m=0, # Only HH or BH channels. If a station has HH channels, those will be # downloaded, otherwise the BH. Nothing will be downloaded if it has # neither. You can add more/less patterns if you like. channel_priorities=channel_priorities, # channel_priorities=["HG[NE]", "HG[NE]"], # Location codes are arbitrary and there is no rule as to which # location is best. Same logic as for the previous setting. location_priorities=["", "00", "10"]) # No specified providers will result in all known ones being queried. # mdl = MassDownloader() mdl = MassDownloader(providers=[providers]) # The data will be downloaded to the ``./waveforms/`` and ``./stations/`` # folders with automatically chosen file names. mdl.download(domain, restrictions, mseed_storage=folder_output, stationxml_storage=folder_output)
location="", channel="?N?", # The typical use case for such a data set are noise correlations where # gaps are dealt with at a later stage. reject_channels_with_gaps=False, # Same is true with the minimum length. All data might be useful. minimum_length=0.0, # Guard against the same station having different names. #minimum_interstation_distance_in_m=100.0 ) mdl = "tmp" download = False if download: mdl = MassDownloader(providers=["SCEDC"]) mdl.download(domain, restrictions, mseed_storage=data_path + "waveforms/USC/", stationxml_storage=data_path + "stations/") #%% Read in a couple test waveforms plots = False HNx_st = Stream() LNx_st = Stream() # High Broad Band (H??) HNx_st += read(data_path + "waveforms/USC/*HN*.mseed") # Long Period (L??) LNx_st += read(data_path + "waveforms/USC/*LN*.mseed")
# Will not be used if the channel argument is used. channel_priorities=('BH[ZNE12]', 'BL[ZNE12]', 'HH[ZNE12]', 'HL[ZNE12]', 'SH[ZNE12]', 'SL[ZNE12]', 'EH[ZNE12]', 'EL[ZNE12]', 'SP[ZNE12]', 'EP[ZNE12]', 'DP[ZNE12]'), # Priority list for the locations. # Will not be used if the location argument is used. (defaults are used) location_priorities=('', '00', '10', '01', '20', '02', '30', '03', '40', '04', '50', '05', '60', '06', '70', '07', '80', '08', '90', '09')) # Step 2: Storage Options # Use custom function to store miniSEED waveforms mseed_storage = get_mseed_storage # Directly use folder name to store StationXML files stationxml_storage = "stations" # Step 3: Start the Download # List of FDSN client names or service URLS #mdl = MassDownloader(providers=["IRIS", "GFZ", "SCEDC"]) mdl = MassDownloader() tt = mdl.download(domain, restrictions, threads_per_client=3, mseed_storage=mseed_storage, stationxml_storage=stationxml_storage)
def downloadwav( phase: str, min_epid: float, max_epid: float, model: TauPyModel, event_cat: Catalog, tz: float, ta: float, statloc: str, rawloc: str, clients: list, evtfile: str, network: str = None, station: str = None, saveasdf: bool = False, log_fh: logging.FileHandler = None, loglvl: int = logging.WARNING, verbose: bool = False, fast_redownload: bool = False): """ Downloads the waveforms for all events in the catalogue for a circular domain around the epicentre with defined epicentral distances from Clients defined in clients. Also Station xmls for corresponding stations are downloaded. Parameters ---------- phase : string Arrival phase to be used. P, S, SKS, or ScS. min_epid : float Minimal epicentral distance to be downloaded. max_epid : float Maxmimal epicentral distance to be downloaded. model : obspy.taup.TauPyModel 1D velocity model to calculate arrival. event_cat : Obspy event catalog Catalog containing all events, for which waveforms should be downloaded. tz : int time window before first arrival to download (seconds) ta : int time window after first arrival to download (seconds) statloc : string Directory containing the station xmls. rawloc : string Directory containing the raw seismograms. clients : list List of FDSN servers. See obspy.Client documentation for acronyms. network : string or list, optional Network restrictions. Only download from these networks, wildcards allowed. The default is None. station : string or list, optional Only allowed if network != None. Station restrictions. Only download from these stations, wildcards are allowed. The default is None. saveasdf : bool, optional Save the dataset as Adaptable Seismic Data Format (asdf; recommended). Else, one will be left with .mseeds. log_fh : logging.FileHandler, optional file handler to be used for the massdownloader logger. loglvl : int, optional Use this logging level. verbose: Bool, optional Set True, when experiencing issues with download. Output of obspy MassDownloader will be logged in download.log. Returns ------- None """ # needed to check whether data is already in the asdf global asdfsave asdfsave = saveasdf # Calculate the min and max theoretical arrival time after event time # according to minimum and maximum epicentral distance min_time = model.get_travel_times(source_depth_in_km=500, distance_in_degree=min_epid, phase_list=[phase])[0].time max_time = model.get_travel_times(source_depth_in_km=0.001, distance_in_degree=max_epid, phase_list=[phase])[0].time mdl = MassDownloader(providers=clients) ########### # logging for the download fdsn_mass_logger = logging.getLogger("obspy.clients.fdsn.mass_downloader") fdsn_mass_logger.setLevel(loglvl) # # Create handler to the log if log_fh is None: fh = logging.FileHandler(os.path.join('logs', 'download.log')) fh.setLevel(logging.INFO) fh.setLevel(loglvl) # Create Formatter fmt = logging.Formatter( fmt='%(asctime)s - %(levelname)s - %(message)s') fh.setFormatter(fmt) else: fh = log_fh fdsn_mass_logger.addHandler(fh) #### # Loop over each event global event for ii, event in enumerate(tqdm(event_cat)): # fetch event-data origin_time = event.origins[0].time ot_fiss = UTCDateTime(origin_time).format_fissures() fdsn_mass_logger.info('Downloading event: '+ot_fiss) evtlat = event.origins[0].latitude evtlon = event.origins[0].longitude # Download location ot_loc = UTCDateTime(origin_time, precision=-1).format_fissures()[:-6] evtlat_loc = str(roundhalf(evtlat)) evtlon_loc = str(roundhalf(evtlon)) tmp.folder = os.path.join( rawloc, '%s_%s_%s' % (ot_loc, evtlat_loc, evtlon_loc)) # create folder for each event os.makedirs(tmp.folder, exist_ok=True) # Circular domain around the epicenter. This module also offers # rectangular and global domains. More complex domains can be # defined by inheriting from the Domain class. domain = CircularDomain(latitude=evtlat, longitude=evtlon, minradius=min_epid, maxradius=max_epid) restrictions = Restrictions( # Get data from sufficient time before earliest arrival # and after the latest arrival # Note: All the traces will still have the same length starttime=origin_time + min_time - tz, endtime=origin_time + max_time + ta, network=network, station=station, # You might not want to deal with gaps in the data. # If this setting is # True, any trace with a gap/overlap will be discarded. # This will delete streams with several traces! reject_channels_with_gaps=False, # And you might only want waveforms that have data for at least 95% # of the requested time span. Any trace that is shorter than 95% of # the desired total duration will be discarded. minimum_length=0.95, # For 1.00 it will always delete the waveform # No two stations should be closer than 1 km to each other. This is # useful to for example filter out stations that are part of # different networks but at the same physical station. Settings # this option to zero or None will disable that filtering. # Guard against the same station having different names. minimum_interstation_distance_in_m=100.0, # Only HH or BH channels. If a station has BH channels, those will # be downloaded, otherwise the HH. Nothing will be downloaded if it # has neither. channel_priorities=["BH[ZNE12]", "HH[ZNE12]"], # Location codes are arbitrary and there is no rule as to which # location is best. Same logic as for the previous setting. # location_priorities=["", "00", "10"], sanitize=False # discards all mseeds for which no station information is available # I changed it too False because else it will redownload over and # over and slow down the script ) # The data will be downloaded to the ``./waveforms/`` and # ``./stations/`` folders with automatically chosen file names. incomplete = True while incomplete: try: mdl.download( domain, restrictions, mseed_storage=get_mseed_storage, stationxml_storage=statloc, threads_per_client=3, download_chunk_size_in_mb=50) incomplete = False except IncompleteRead: continue # Just retry for poor connection except Exception: incomplete = False # Any other error: continue # 2021.02.15 Here, we write everything to asdf if saveasdf: writeraw(event, tmp.folder, statloc, verbose, True) # If that works, we will be deleting the cached mseeds here try: shutil.rmtree(tmp.folder) except FileNotFoundError: # This does not make much sense, but for some reason it occurs # even if the folder exists? However, we will not want the # whole process to stop because of this pass if fast_redownload: event_cat[ii:].write(evtfile, format="QUAKEML") if not saveasdf: download_full_inventory(statloc, clients) tmp.folder = "finished" # removes the restriction for preprocess.py
def get_mass_data(network, starttime, endtime, outdir='.', lat=0, lon=0, minrad=0, maxrad=180, providers=['IRIS']): """ Use obspy's massdownloader to download lots of data, stores as day-long \ miniseed files using IRIS DMC naming conventions. :type network: str :param network: Network code :type starttime: UTCDateTime :param starttime: Time to begin donloading from, will use the date :type endtime: UTCDateTime :param endtime: Time to end download, will use the date :type outdir: str :param outdir: Path to write to, will write in Y????/R???.01 directories \ within this head directory. :type lat: float :param lat: Origin latitude :type lon: float :param lon: Origin longitude :type minrad: float :param minrad: Minumum radius in degrees for stations from lat/lon. :type maxrad: float :param maxrad: Maximum radius in degrees for stations from lat/lon :type providers: list :param providers: List of providers to query. Default is IRIS. Can parse \ an empty list and will query all providers, but slow. .. note:: Currently selects data using a circular domain, default is \ set to entire globe so that a whole network can be downloaded. \ Options left in function to allow repurposing. """ def get_mseed_storage(network, station, location, channel, starttime, endtime): """Function to define the naming for the stored file. .. note:: Can only have these arguments. As such this needs to be a \ function defined within the other function to have access to the \ outdir variable. """ import os # Returning True means that neither the data nor the StationXML file # will be downloaded. # If a string is returned the file will be saved in that location. path = os.path.join( outdir, "%s/%s.%s.%s.%s.%s" % (starttime.strftime('Y%Y/R%j.01'), network, station, location, channel, starttime.strftime('%Y.%j'))) if os.path.exists(path): return True return path import obspy from obspy.clients.fdsn.mass_downloader import CircularDomain, \ Restrictions, MassDownloader domain = CircularDomain(latitude=lat, longitude=lon, minradius=minrad, maxradius=maxrad) restrictions = Restrictions( # Get data for a whole year. starttime=starttime, endtime=endtime, # Chunk it to have one file per day. chunklength_in_sec=86400, # Considering the enormous amount of data associated with continuous # requests, you might want to limit the data based on SEED identifiers. # If the location code is specified, the location priority list is not # used; the same is true for the channel argument and priority list. network=network, station="NA390", location="*", channel="H*", # The typical use case for such a data set are noise correlations where # gaps are dealt with at a later stage. reject_channels_with_gaps=False, # Same is true with the minimum length. All data might be useful. minimum_length=0.0, # Guard against the same station having different names. minimum_interstation_distance_in_m=0.0, # Do not sanitize downloads, currently a bug sanitize=False, location_priorities=("", "01", "00", "EP", "S1", "S3", "02", "10", "09", "08", "03", "04", "06", "07", "05", "20", "T0", "2C", "40", "50")) # Restrict the number of providers if you know which serve the desired # data. If in doubt just don't specify - then all providers will be # queried. mdl = MassDownloader(providers=providers) mseed_storage = get_mseed_storage # + "/{station}.{network}.{location}.{channel}.{starttime}") mdl.download(domain, restrictions, mseed_storage=mseed_storage, stationxml_storage="stations", threads_per_client=5)
def retrieveData(self): """Retrieve data from many FDSN services, turn into StreamCollection. Args: event (dict): Best dictionary matching input event, fields as above in return of getMatchingEvents(). Returns: StreamCollection: StreamCollection object. """ # Bail out if FDSNFetcher not configured if 'FDSNFetcher' not in self.config['fetchers']: return rawdir = self.rawdir if self.rawdir is None: rawdir = tempfile.mkdtemp() else: if not os.path.isdir(rawdir): os.makedirs(rawdir) # use the mass downloader to retrieve data of interest from any FSDN # service. origin_time = UTCDateTime(self.time) # The Obspy mass downloader has it's own logger - grab that stream # and write it to our own log file ldict = logging.Logger.manager.loggerDict if OBSPY_LOGGER in ldict: root = logging.getLogger() fhandler = root.handlers[0] obspy_logger = logging.getLogger(OBSPY_LOGGER) obspy_stream_handler = obspy_logger.handlers[0] obspy_logger.removeHandler(obspy_stream_handler) obspy_logger.addHandler(fhandler) # Circular domain around the epicenter. domain = CircularDomain(latitude=self.lat, longitude=self.lon, minradius=0, maxradius=self.radius) min_dist = self.minimum_interstation_distance_in_m restrictions = Restrictions( # Define the temporal bounds of the waveform data. starttime=origin_time - self.time_before, endtime=origin_time + self.time_after, network=self.network, station='*', location='*', location_priorities=['*'], reject_channels_with_gaps=self.reject_channels_with_gaps, # Any trace that is shorter than 95 % of the # desired total duration will be discarded. minimum_length=self.minimum_length, sanitize=self.sanitize, minimum_interstation_distance_in_m=min_dist, exclude_networks=self.exclude_networks, exclude_stations=self.exclude_stations, channel_priorities=self.channels) # For each of the providers, check if we have a username and password # provided in the config. If we do, initialize the client with the # username and password. Otherwise, use default initalization. client_list = [] for provider_str in URL_MAPPINGS.keys(): if provider_str == GEO_NET_ARCHIVE_KEY: dt = UTCDateTime.utcnow() - UTCDateTime(self.time) if dt < GEONET_ARCHIVE_DAYS: provider_str = GEONET_REALTIME_URL try: fdsn_config = self.config['fetchers']['FDSNFetcher'] if provider_str in fdsn_config: client = Client( provider_str, user=fdsn_config[provider_str]['user'], password=fdsn_config[provider_str]['password']) else: client = Client(provider_str) client_list.append(client) # If the FDSN service is down, then an FDSNException is raised except FDSNException: logging.warning('Unable to initalize client %s' % provider_str) except KeyError: logging.warning('Unable to initalize client %s' % provider_str) if len(client_list): # Pass off the initalized clients to the Mass Downloader mdl = MassDownloader(providers=client_list) logging.info('Downloading new MiniSEED files...') # The data will be downloaded to the ``./waveforms/`` and # ``./stations/`` folders with automatically chosen file names. mdl.download(domain, restrictions, mseed_storage=rawdir, stationxml_storage=rawdir) seed_files = glob.glob(os.path.join(rawdir, '*.mseed')) streams = [] for seed_file in seed_files: try: tstreams = read_obspy(seed_file, self.config) except BaseException as e: tstreams = None fmt = 'Could not read seed file %s - "%s"' logging.info(fmt % (seed_file, str(e))) if tstreams is None: continue else: streams += tstreams stream_collection = StreamCollection( streams=streams, drop_non_free=self.drop_non_free) return stream_collection
def download_rf(self, datadir, minDelta=30, maxDelta=150, fskip=1, chanrank=['BH', 'HH'], channels = 'ZNE', phase='P',\ startoffset=-30., endoffset=60.0, verbose=False, start_date=None, end_date=None, skipinv=True, threads_per_client = 3,\ providers = None, blon = 0.05, blat = 0.05): """request receiver function data from IRIS server ==================================================================================================================== ::: input parameters ::: min/maxDelta - minimum/maximum epicentral distance, in degree channels - Channel code, e.g. 'BHZ'. Last character (i.e. component) can be a wildcard (‘?’ or ‘*’) to fetch Z, N and E component. min/maxDelta - minimum/maximum epicentral distance, in degree channel_rank - rank of channel types phase - body wave phase to be downloaded, arrival time will be computed using taup start/endoffset - start and end offset for downloaded data ===================================================================================================================== """ if providers is None: providers = ['BGR', 'ETH', 'GFZ', 'ICGC', 'INGV', 'IPGP',\ 'IRIS', 'KNMI', 'KOERI', 'LMU', 'NCEDC', 'NIEP', 'NOA', 'ODC', 'ORFEUS',\ 'RASPISHAKE', 'RESIF', 'SCEDC', 'TEXNET', 'USP'] self.get_limits_lonlat() minlongitude = self.minlon maxlongitude = self.maxlon if minlongitude > 180.: minlongitude -= 360. if maxlongitude > 180.: maxlongitude -= 360. lon0 = (minlongitude + maxlongitude) / 2. lat0 = (self.minlat + self.maxlat) / 2. domain = RectangularDomain(minlatitude=self.minlat - blat, maxlatitude=self.maxlat + blat, minlongitude=minlongitude - blon, maxlongitude=maxlongitude + blon) try: print(self.cat) except AttributeError: self.copy_catalog() try: stime4down = obspy.core.utcdatetime.UTCDateTime(start_date) except: stime4down = obspy.UTCDateTime(0) try: etime4down = obspy.core.utcdatetime.UTCDateTime(end_date) except: etime4down = obspy.UTCDateTime() mdl = MassDownloader(providers=providers) chantype_list = [] for chantype in chanrank: chantype_list.append('%s[%s]' % (chantype, channels)) channel_priorities = tuple(chantype_list) t1 = time.time() # loop over events ievent = 0 for event in self.cat: event_id = event.resource_id.id.split('=')[-1] pmag = event.preferred_magnitude() magnitude = pmag.mag Mtype = pmag.magnitude_type event_descrip = event.event_descriptions[ 0].text + ', ' + event.event_descriptions[0].type porigin = event.preferred_origin() otime = porigin.time if otime < stime4down or otime > etime4down: continue ievent += 1 try: print('[%s] [DOWNLOAD BODY WAVE] ' %datetime.now().isoformat().split('.')[0] + \ 'Event ' + str(ievent)+': '+ str(otime)+' '+ event_descrip+', '+Mtype+' = '+str(magnitude)) except: print('[%s] [DOWNLOAD BODY WAVE] ' %datetime.now().isoformat().split('.')[0] + \ 'Event ' + str(ievent)+': '+ str(otime)+' '+ event_descrip+', M = '+str(magnitude)) evlo = porigin.longitude evla = porigin.latitude try: evdp = porigin.depth / 1000. except: continue # log file existence oyear = otime.year omonth = otime.month oday = otime.day ohour = otime.hour omin = otime.minute osec = otime.second label = '%d_%s_%d_%d_%d_%d' % (oyear, mondict[omonth], oday, ohour, omin, osec) eventdir = datadir + '/' + label if fskip == 2 and os.path.isdir(eventdir): continue if not os.path.isdir(eventdir): os.makedirs(eventdir) event_logfname = eventdir + '/download.log' if fskip == 1 and os.path.isfile(event_logfname): continue stationxml_storage = "%s/{network}/{station}.xml" % eventdir # loop over stations Nsta = 0 for network in self.inv: for station in network: netcode = network.code stacode = station.code staid = netcode + '.' + stacode with warnings.catch_warnings(): warnings.simplefilter("ignore") st_date = station.start_date ed_date = station.end_date if skipinv and (otime < st_date or otime > ed_date): continue stlo = station.longitude stla = station.latitude dist, az, baz = obspy.geodetics.gps2dist_azimuth( evla, evlo, stla, stlo) # distance is in m dist = dist / 1000. if baz < 0.: baz += 360. Delta = obspy.geodetics.kilometer2degrees(dist) if Delta < minDelta: continue if Delta > maxDelta: continue arrivals = taupmodel.get_travel_times( source_depth_in_km=evdp, distance_in_degree=Delta, phase_list=[phase]) #, receiver_depth_in_km=0) try: arr = arrivals[0] arrival_time = arr.time rayparam = arr.ray_param_sec_degree except IndexError: continue starttime = otime + arrival_time + startoffset endtime = otime + arrival_time + endoffset restrictions = Restrictions( network=netcode, station=stacode, # starttime and endtime starttime=starttime, endtime=endtime, # You might not want to deal with gaps in the data. reject_channels_with_gaps=True, # And you might only want waveforms that have data for at least # 95 % of the requested time span. minimum_length=0.95, # No two stations should be closer than 10 km to each other. minimum_interstation_distance_in_m=10E3, channel_priorities=channel_priorities, sanitize=True) mseed_storage = eventdir try: mdl.download(domain, restrictions, mseed_storage=mseed_storage, stationxml_storage=stationxml_storage, threads_per_client=threads_per_client) except Exception: continue Nsta += 1 print('--- [DOWNLOAD BODY WAVE] Event: %s %s' % (otime.isoformat(), event_descrip)) with open(event_logfname, 'w') as fid: fid.writelines('evlo: %g, evla: %g\n' % (evlo, evla)) fid.writelines('DONE\n') return
def run_get_waveform(self): """ Get SAC waveforms for an event basic usage: run_get_waveform(event) c - client event - obspy Event object ref_time_place - reference time and place (other than origin time and place - for station subsetting) """ #c = self.client event = self.ev ref_time_place = self.ref_time_place evtime = event.origins[0].time reftime = ref_time_place.origins[0].time #----------------------------------------------------------- # BEGIN OPTIONS MASS DOWNLOADER #----------------------------------------------------------- #if self.ifmass_downloader is True: if self.idb is not None and self.ifmass_downloader is True: domain = CircularDomain( latitude =self.elat, longitude=self.elon, minradius=kilometer2degrees(self.min_dist), maxradius=kilometer2degrees(self.max_dist)) print('DEBUG. domain radius (deg) min/max %f/%f (input, km %f)' % (domain.minradius, domain.maxradius, self.max_dist)) print('DEBUG. lon/lat %f/%f' % (domain.longitude, domain.latitude)) restrictions = Restrictions( starttime = reftime - self.tbefore_sec, endtime = reftime + self.tafter_sec, #station_starttime = None, #station_endtime = None, station_starttime = reftime - self.tbefore_sec, # 2021-06-25 TEST. only query stations available during the event times. station_endtime = reftime + self.tafter_sec, chunklength_in_sec = None, network = self.network, station = self.station, location = self.location, channel = self.channel, #exclude_networks = (), #exclude_stations = (), #limit_stations_to_inventory=None, reject_channels_with_gaps=False, minimum_length = 0.0, sanitize = True, minimum_interstation_distance_in_m = 100, # 2021-07-13 avoid using same station with different names #channel_priorities=(), #location_priorities=()) ) mdl = MassDownloader() outdir = './' + self.evname mdl.download(domain=domain, restrictions=restrictions, mseed_storage=outdir+"/mass_downloader/waveforms", stationxml_storage=outdir+"/mass_downloader/stations", download_chunk_size_in_mb=20, threads_per_client=3, print_report=True) inventory = get_inventory_from_xml(outdir+"/mass_downloader/stations") stream_raw = get_streams_from_dir(outdir+"/mass_downloader/waveforms") print(inventory) phases = self.phases t1s, t2s= get_phase_arrival_times(inventory,event,self.phases, self.phase_window,self.taupmodel, reftime,self.tbefore_sec,self.tafter_sec) # End mass downloader #----------------------------------------------------------- #----------------------------------------------------------- # Pick client #----------------------------------------------------------- # Add deprecation warning if self.idb is not None: print('WARNING: Instead of idb use which client you want to use \n'\ ' By default ev_info.client_name is set to IRIS') if self.idb == 3: self.client_name = "LLNL" if self.client_name != "LLNL" and self.ifmass_downloader is False: # Send request to client # There might be other way to do this using 'RoutingClient' print("Sending request to client: %s" % self.client_name) c = self.client print(c) # Check if stations chosen are correct # Example: NCEDC does not understand '-XXX' station code if self.client_name == "NCEDC": if '-' in self.station: raise ValueError("NCEDC client does not take '-' in station code") if self.client_name == "IRIS": if '*' in self.network: print("WARNING: You have chosen to search ALL networks at IRIS." \ "This could take long!") #----------------------------- if self.ifph5: STATION = 'http://service.iris.edu/ph5ws/station/1' c = fdsn.client.Client('http://service.iris.edu', service_mappings={ 'station': STATION }, debug=True ) #----------------------------------------------------------- # Download stations #----------------------------------------------------------- print("Download stations...") stations = c.get_stations(network=self.network, location=self.location, station=self.station, channel=self.channel, starttime=reftime - self.tbefore_sec, endtime=reftime + self.tafter_sec, minlatitude=self.min_lat, maxlatitude=self.max_lat, minlongitude=self.min_lon, maxlongitude=self.max_lon, level="response") inventory = stations # so that llnl and iris scripts can be combined if self.ifverbose: print("Printing stations") print(stations) print("Done Printing stations...") sta_limit_distance(ref_time_place, stations, min_dist=self.min_dist, max_dist=self.max_dist, min_az=self.min_az, max_az=self.max_az, ifverbose=self.ifverbose) #print("Printing stations NEW") #print(stations) #print("Done Printing stations...") #stations.plotprojection="local") # Find P and S arrival times phases = self.phases t1s, t2s= get_phase_arrival_times(stations,event,self.phases, self.phase_window,self.taupmodel, reftime,self.tbefore_sec,self.tafter_sec) print("Downloading waveforms...") # this needs to change bulk_list = make_bulk_list_from_stalist(stations,t1s,t2s, channel=self.channel) if self.ifph5: DATASELECT = 'http://service.iris.edu/ph5ws/dataselect/1' c = fdsn.client.Client('http://service.iris.edu', service_mappings={ 'dataselect': DATASELECT }, user = self.user,password = self.password, debug=True ) stream_raw = c.get_waveforms(network=self.network, location=self.location, station=self.station, channel=self.channel, starttime=reftime - self.tbefore_sec, endtime=reftime + self.tafter_sec) else: stream_raw = c.get_waveforms_bulk(bulk_list) # save ev_info object pickle.dump(self,open(self.evname + '/' + self.evname + '_ev_info.obj', 'wb')) elif self.client_name=="LLNL" and self.ifmass_downloader is False: #client_name = "LLNL" print("Preparing request for LLNL ...") # Get event an inventory from the LLNL DB. event_number = int(event.event_descriptions[0].text) # event = llnl_db_client.get_obspy_event(event) inventory = c.get_inventory() nsta_llnl = len(inventory.get_contents()["stations"]) print("--> Total stations in LLNL DB: %i" % nsta_llnl) sta_limit_distance(event, inventory, min_dist=self.min_dist, max_dist=self.max_dist, min_az=self.min_az, max_az=self.max_az) print("--> Stations after filtering for distance: %i" % ( len(inventory.get_contents()["stations"]))) stations = set([sta.code for net in inventory for sta in net]) _st = c.get_waveforms_for_event(event_number) stream_raw = obspy.Stream() for tr in _st: if tr.stats.station in stations: stream_raw.append(tr) # set reftime #inventory = stations stream = obspy.Stream() stream = set_reftime(stream_raw, evtime) nsta = len(stream) if nsta < 1: print('STOP. No waveforms to process. N stations = %d\n' % nsta) sys.exit() print("--> Adding SAC metadata...") if self.ifverbose: print(stream.__str__(extended=True)) st2 = add_sac_metadata(stream, client_name=self.client_name, ev=event, inventory=inventory, taup_model= self.taupmodel, ) # 2022-03-10 CHECK IF NEEDED: stalist=inventory. # phases=phases, phase_write = self.write_sac_phase) print('stalist inventory', inventory) if(len(st2)<1): print('STOP. No waveforms left to process.') sys.exit() # Do some waveform QA do_waveform_QA(st2, self.client_name, event, evtime, self.tbefore_sec, self.tafter_sec) if self.demean: st2.detrend('demean') if self.detrend: st2.detrend('linear') if self.ifFilter: prefilter(st2, self.f1, self.f2, self.zerophase, self.corners, self.filter_type) if self.remove_response: resp_plot_remove(st2, self.ipre_filt, self.pre_filt, self.iplot_response, self.water_level, self.scale_factor, inventory, self.outformat, self.ifverbose) else: # output RAW waveforms decon=False print("WARNING -- NOT correcting for instrument response") if self.scale_factor > 0: amp_rescale(st2, self.scale_factor) if self.client_name == "LLNL": amp_rescale_llnl(st2, self.scale_factor) # Set the sac header KEVNM with event name # This applies to the events from the LLNL database # NOTE this command is needed at the time of writing files, so it has to # be set early st2, evname_key = rename_if_LLNL_event(st2, evtime) self.evname = evname_key # save station plot # Note: Plotted are stations in the inventory and NOT the ones with the traces # It could be possible that there might not be waveforms for some of these stations. try: fig = inventory.plot(projection="local", resolution="i", label = False, show=False) Catalog([self.ev]).plot(fig=fig, outfile=self.evname + '/station_map.pdf') except: print("There is a problem with creating the station map!") # Get list of unique stations + locaiton (example: 'KDAK.00') stalist = [] for tr in stream.traces: if self.ifverbose: print(tr) station_key = "%s.%s.%s.%s" % (tr.stats.network, tr.stats.station, tr.stats.location, tr.stats.channel[:-1]) stalist.append(station_key) # Crazy way of getting a unique list of stations stalist = list(set(stalist)) # Resample if self.resample_TF == True: print('\nRESAMPLING DATA\n') print("New sample rate %f Hz" % self.resample_freq) # NOTE !!! tell the user if BOTH commands are disabled NOTE !!! if (self.client_name == "IRIS"): resample(st2, freq=self.resample_freq) elif (self.client_name == "LLNL"): resample_cut(st2, self.resample_freq, evtime, self.tbefore_sec, self.tafter_sec) else: print("WARNING. Will not resample. Using original rate from the data: %f Hz" % self.resample_freq) # match start and end points for all traces st2 = trim_maxstart_minend(stalist, st2, self.client_name, event, evtime, self.resample_TF, self.resample_freq, self.tbefore_sec, self.tafter_sec, self.ifverbose) if len(st2) == 0: raise ValueError("no waveforms left to process!") # save raw waveforms in SAC format if self.isave_raw: path_to_waveforms = evname_key + "/RAW" write_stream_sac_raw(stream_raw, path_to_waveforms, evname_key, self.client_name, event, stations=inventory) # Taper waveforms (optional; Generally used when data is noisy- example: HutchisonGhosh2016) # https://docs.obspy.org/master/packages/autogen/obspy.core.trace.Trace.taper.html # To get the same results as the default taper in SAC, use max_percentage=0.05 and leave type as hann. # Note: Tapering also happens while resampling (see util_write_cap.py) if self.taper: st2.taper(max_percentage=self.taper, type='hann',max_length=None, side='both') # save processed waveforms in SAC format # evname_key/RAW_processed = traces after waveform_QA + demean + detrend + # resample + remove response + filtering + # resampling + scaling + tapering # NOTE: The orientation is same as that of extracted waveforms # Waveforms are rotated to ENZ, in case they are not already orientated, # in the next step (self.rotateRTZ) if self.isave_raw_processed: path_to_waveforms = os.path.join(evname_key, 'RAW_processed') write_stream_sac(st2, path_to_waveforms, evname_key) # Rotate to ENZ (save: optional) #if self.rotateENZ: #st2 = rotate2ENZ(st2, evname_key, self.isave_ENZ, self.icreateNull, self.ifverbose) print ('\nBEGIN ROTATE COMPONENTS') if self.rotateENZ: st2 = rotate2ENZ(st2, evname_key, self.isave_ENZ, self.icreateNull, self.ifverbose) # rotate to UVW and save if self.rotateUVW: rotate2UVW(st2, evname_key) # Rotate to RTZ and save if self.rotateRTZ: rotate2RTZ(st2, evname_key, self.ifverbose) # save CAP weight files if self.output_cap_weight_file: write_cap_weights(st2, evname_key, self.client_name, event, self.ifverbose) # save event info if self.output_event_info: write_ev_info(event, evname_key) # Plot spectrograms if self.ifplot_spectrogram: plot_spectrogram(st2, evname_key) # save pole zero file (Needed for MouseTrap) if self.ifsave_sacpaz: write_resp(inventory,evname_key) # save station inventory as XML file if self.ifsave_stationxml: xmlfilename = evname_key + "/stations.xml" try: inventory.write(xmlfilename, format="stationxml", validate=True) except: print('Could not create stationxml file') # Path to the asdf_converter script if self.ifsave_asdf: # save RTZ asdf_filename = evname_key + "/" + evname_key + ".h5" os.system("../asdf_converters/asdf_converters/sac2asdf.py " + evname_key + " " + asdf_filename + " observed") # save NEZ nez_dir = evname_key + "/ENZ/" nez_asdf_filename = nez_dir + evname_key + ".h5" os.system("../asdf_converters/asdf_converters/sac2asdf.py " + nez_dir + " " + nez_asdf_filename + " observed") print ('\nDone processing waveform data for event %s' % evname_key)
domain = GlobalDomain() #stations_list=["AXAS1","AXAS2","AXBA1","AXCC1","AXEC1","AXEC2","AXEC3","AXID1"] stations_list = "AX*" client = Client(center) ### Restrictions restrictions = Restrictions( starttime=obspy.UTCDateTime(2015, 1, 1), endtime=obspy.UTCDateTime(2016, 1, 1), chunklength_in_sec=86400, network=network_code, station=stations_list, # The typical use case for such a data set are noise correlations where # gaps are dealt with at a later stage. reject_channels_with_gaps=False, # Same is true with the minimum length. All data might be useful. minimum_length=0.0, channel_priorities=["HH[ZNE]", "EH[ZNE]"], # Guard against the same station having different names. minimum_interstation_distance_in_m=0.0) ### Downlaod Data mdl = MassDownloader(providers=[center]) mdl.download(domain, restrictions, mseed_storage="/home/baillard/waveforms/", stationxml_storage='/home/baillard/')
def download_surf(self, datadir, commontime = True, fskip=True, chanrank=['LH', 'BH', 'HH'],\ channels='ZNE', vmax = 8.0, vmin=.5, verbose=False, start_date=None, end_date=None, skipinv=True, threads_per_client = 3,\ providers = None, blon = 0.05, blat = 0.05): """request Rayleigh wave data from ==================================================================================================================== ::: input parameters ::: lon0, lat0 - center of array. If specified, all waveform will have the same starttime and endtime min/maxDelta - minimum/maximum epicentral distance, in degree channel - Channel code, e.g. 'BHZ'. Last character (i.e. component) can be a wildcard (‘?’ or ‘*’) to fetch Z, N and E component. vmin, vmax - minimum/maximum velocity for surface wave window ===================================================================================================================== """ if providers is None: providers = ['BGR', 'ETH', 'GFZ', 'ICGC', 'INGV', 'IPGP',\ 'IRIS', 'KNMI', 'KOERI', 'LMU', 'NCEDC', 'NIEP', 'NOA', 'ODC', 'ORFEUS',\ 'RASPISHAKE', 'RESIF', 'SCEDC', 'TEXNET', 'USP'] self.get_limits_lonlat() minlongitude = self.minlon maxlongitude = self.maxlon if minlongitude > 180.: minlongitude -= 360. if maxlongitude > 180.: maxlongitude -= 360. lon0 = (minlongitude + maxlongitude) / 2. lat0 = (self.minlat + self.maxlat) / 2. domain = RectangularDomain(minlatitude=self.minlat - blat, maxlatitude=self.maxlat + blat, minlongitude=minlongitude - blon, maxlongitude=maxlongitude + blon) try: print(self.cat) except AttributeError: self.copy_catalog() try: stime4down = obspy.core.utcdatetime.UTCDateTime(start_date) except: stime4down = obspy.UTCDateTime(0) try: etime4down = obspy.core.utcdatetime.UTCDateTime(end_date) except: etime4down = obspy.UTCDateTime() mdl = MassDownloader(providers=providers) chantype_list = [] for chantype in chanrank: chantype_list.append('%s[%s]' % (chantype, channels)) channel_priorities = tuple(chantype_list) t1 = time.time() # loop over events for event in self.cat: pmag = event.preferred_magnitude() try: magnitude = pmag.mag evdp = porigin.depth / 1000. except: pass try: Mtype = pmag.magnitude_type event_descrip = event.event_descriptions[ 0].text + ', ' + event.event_descriptions[0].type porigin = event.preferred_origin() otime = porigin.time timestr = otime.isoformat() evlo = porigin.longitude evla = porigin.latitude except: pass if otime < stime4down or otime > etime4down: continue if commontime: dist, az, baz = obspy.geodetics.gps2dist_azimuth( evla, evlo, lat0, lon0) # distance is in m dist = dist / 1000. starttime = otime + dist / vmax endtime = otime + dist / vmin oyear = otime.year omonth = otime.month oday = otime.day ohour = otime.hour omin = otime.minute osec = otime.second label = '%d_%s_%d_%d_%d_%d' % (oyear, mondict[omonth], oday, ohour, omin, osec) eventdir = datadir + '/' + label if not os.path.isdir(eventdir): os.makedirs(eventdir) event_logfname = eventdir + '/download.log' if fskip and os.path.isfile(event_logfname): continue stationxml_storage = "%s/{network}/{station}.xml" % eventdir if commontime: restrictions = Restrictions( # starttime and endtime starttime=starttime, endtime=endtime, # You might not want to deal with gaps in the data. reject_channels_with_gaps=True, # And you might only want waveforms that have data for at least # 95 % of the requested time span. minimum_length=0.95, # No two stations should be closer than 10 km to each other. minimum_interstation_distance_in_m=10E3, # Only HH or BH channels. If a station has HH channels, # those will be downloaded, otherwise the BH. Nothing will be # downloaded if it has neither. channel_priorities=channel_priorities, sanitize=True) mseed_storage = eventdir # # # mseed_storage = ("%s/{network}/{station}/{channel}.{location}.%s.mseed" %(datadir, label) ) mdl.download(domain, restrictions, mseed_storage=mseed_storage, stationxml_storage=stationxml_storage, threads_per_client=threads_per_client) else: # loop over stations Nsta = 0 for network in self.inv: for station in network: netcode = network.code stacode = station.code staid = netcode + '.' + stacode with warnings.catch_warnings(): warnings.simplefilter("ignore") st_date = station.start_date ed_date = station.end_date if skipinv and (otime < st_date or otime > ed_date): continue stlo = station.longitude stla = station.latitude dist, az, baz = obspy.geodetics.gps2dist_azimuth( evla, evlo, stla, stlo) # distance is in m dist = dist / 1000. starttime = otime + dist / vmax endtime = otime + dist / vmin restrictions = Restrictions( network=netcode, station=stacode, # starttime and endtime starttime=starttime, endtime=endtime, # You might not want to deal with gaps in the data. reject_channels_with_gaps=True, # And you might only want waveforms that have data for at least # 95 % of the requested time span. minimum_length=0.95, # No two stations should be closer than 10 km to each other. minimum_interstation_distance_in_m=10E3, # Only HH or BH channels. If a station has HH channels, # those will be downloaded, otherwise the BH. Nothing will be # downloaded if it has neither. channel_priorities=channel_priorities, sanitize=True) mseed_storage = eventdir # mseed_storage = ("%s/{network}/{station}/{channel}.{location}.%s.mseed" %(datadir, label) ) mdl.download(domain, restrictions, mseed_storage=mseed_storage, stationxml_storage=stationxml_storage, threads_per_client=threads_per_client) Nsta += 1 print('--- [RAYLEIGH DATA DOWNLOAD] Event: %s %s' % (otime.isoformat(), event_descrip)) with open(event_logfname, 'w') as fid: fid.writelines('evlo: %g, evla: %g\n' % (evlo, evla)) if commontime: fid.writelines('distance: %g km\n' % dist) fid.writelines('DONE\n') return
def download(evtime,sec_before,sec_after,lon,lat,minrad,maxrad,provider=["IRIS"],OUT='./'): ''' #example input evtime='2000-01-01T06:58:39.780Z' sec_before=120 sec_after=600 lon=120 lat=24 minrad=0 maxrad=10 provider=["IRIS"] OUT='./TESTDL' download(evtime,sec_before,sec_after,lon,lat,minrad,maxrad,provider,OUT) ''' yyyy,mm,dd,HH,MM,SS,NS=cattime2normal(evtime) origin_time = obspy.UTCDateTime(yyyy,mm,dd,HH,MM,SS,NS) domain = CircularDomain(latitude=lat, longitude=lon, minradius=minrad, maxradius=maxrad) #domain = RectangularDomain(minlatitude=34.452, maxlatitude=38.72, minlongitude=-123.201, maxlongitude=-118.015) restrictions = Restrictions( # Get data from 5 minutes before the event to one hour after the # event. This defines the temporal bounds of the waveform data. starttime=origin_time - sec_before, endtime=origin_time + sec_after, # You might not want to deal with gaps in the data. If this setting is # True, any trace with a gap/overlap will be discarded. reject_channels_with_gaps=True, # And you might only want waveforms that have data for at least 95 % of # the requested time span. Any trace that is shorter than 95 % of the # desired total duration will be discarded. minimum_length=0.9, # No two stations should be closer than 10 km to each other. This is # useful to for example filter out stations that are part of different # networks but at the same physical station. Settings this option to # zero or None will disable that filtering. minimum_interstation_distance_in_m=10E1, # Only HH or BH channels. If a station has HH channels, those will be # downloaded, otherwise the BH. Nothing will be downloaded if it has # neither. You can add more/less patterns if you like. #channel_priorities=["HH[ZNE]", "BH[ZNE]"], #channel_priorities=["BH[ZNE]"], #channel_priorities=["BH[ZNE]"], #channel_priorities=["BHZ","HNZ"], #channel_priorities=["BHZ"], #channel_priorities=["HNZ"], channel_priorities=["BHZ","HHZ"], #channel_priorities=["HN[ZNE]"], # Location codes are arbitrary and there is no rule as to which # location is best. Same logic as for the previous setting. location_priorities=["", "00", "10","100"]) # No specified providers will result in all known ones being queried. #mdl = MassDownloader(providers=provider) mdl = MassDownloader(providers=["IRIS"]) # The data will be downloaded to the ``./waveforms/`` and ``./stations/`` # folders with automatically chosen file names. outstr=evtime.split('T')[0].replace('-','')+str(HH).zfill(2)+str(MM).zfill(2)+str(SS).zfill(2) #save dir as evid outmsdir=OUT+'/'+outstr+"/waveforms" outstadir=OUT+'/'+outstr+"/stations" mdl.download(domain, restrictions,threads_per_client=20, mseed_storage=outmsdir,stationxml_storage=outstadir) return(outmsdir,outstadir)
def data_request(cmt_filename, param_path): # Request config_file request_param_path = os.path.join(param_path, "RequestParams/RequestParams.yml") # Read the parameter file rCparams = smart_read_yaml(request_param_path, mpi_mode=is_mpi_env()) # Earthquake and Station parameters cmt_dir = os.path.dirname(cmt_filename) station_dir = os.path.join(cmt_dir, "station_data") # Get STATIONS file from CMT directory stationsfile = os.path.join(station_dir, "STATIONS") # Observed output dir obsd_dir = os.path.join(cmt_dir, "seismograms", "obs") # CMT parameter input cmt = CMTSource.from_CMTSOLUTION_file(cmt_filename) duration = rCparams['duration'] starttime_offset = rCparams['starttime_offset'] starttime = cmt.origin_time + starttime_offset endtime = starttime + duration # Get station_list from station_file in database entry stations = read_station_file(stationsfile) # Create list of networks to download from networks = list(set([station[0] for station in stations])) network_string = ",".join(networks) # Set domain containing all locations # Rectangular domain containing parts of southern Germany. domain = RectangularDomain(minlatitude=-90, maxlatitude=90, minlongitude=-180, maxlongitude=180) # Set download restrictions restrictions = Restrictions( starttime=starttime, endtime=endtime, reject_channels_with_gaps=False, minimum_length=float(rCparams['minimum_length']), # Trace needs to be almost full length network=network_string, # Only certain networks channel=",".join(rCparams['channels']), location=",".join(rCparams['locations'])) # No specified providers will result in all known ones being queried. providers = ["IRIS"] mdl = MassDownloader(providers=providers) # The data will be downloaded to the ``./waveforms/`` and ``./stations/`` # folders with automatically chosen file n stationxml_storage = os.path.join(station_dir) waveform_storage = os.path.join(obsd_dir) logger.info("MSEEDs: %s" % waveform_storage) logger.info("XMLs: %s" % stationxml_storage) mdl.download(domain, restrictions, mseed_storage=waveform_storage, stationxml_storage=stationxml_storage)
def GetData(self,stationdirpath='stations',datadirpath='waveforms',req_type='continuous',\ chunklength=86400,tracelen=20000, vmodel='ak135'): '''Call obspy mass downloader to get waveform data. Chunklength refers to the trace length option for a continuous download, tracelen is for an event-based request''' #Currently set up to download one day worth of data in the continuous mode, 2000 seconds #in the event-based mode self.stationdirpath = stationdirpath self.datadirpath = datadirpath from obspy.clients.fdsn.mass_downloader import RectangularDomain, CircularDomain,\ Restrictions, MassDownloader if req_type == 'continuous': #Get data from all stations within this domain domain = RectangularDomain(minlatitude=self.minlatitude,maxlatitude=self.maxlatitude,\ minlongitude=self.minlongitude,maxlongitude=self.maxlongitude) #Download data in daily segements - may want to change restrictions = Restrictions(\ starttime=self.starttime,endtime=self.endtime,\ chunklength_in_sec=chunklength,\ channel=self.channel,station=self.station,location="",\ reject_channels_with_gaps=False,\ minimum_length=0.0,minimum_interstation_distance_in_m=100.0) #Call mass downloader to get the waveform information mdl = MassDownloader(providers=[self.clientname]) mdl.download(domain, restrictions, mseed_storage=datadirpath, stationxml_storage=stationdirpath) elif req_type == 'event': if self.quake_cat == None: print( "Stop: Must call fetchEvents first to get event catalog to download from" ) sys.exit(1) #Add option for non-continuous download - event/station pairing for example #Ger data for all stations in this domain domain = RectangularDomain(minlatitude=self.minlatitude,maxlatitude=self.maxlatitude,\ minlongitude=self.minlongitude,maxlongitude=self.maxlongitude) for event in self.quake_cat: cnt = 0. print("Downloading data for event %s" % event) #For each event, download the waveforms at all stations requested origin_time = event.origins[0].time vel_model = TauPyModel(model=vmodel) #case where we only want to download data for some station-event pairs' stations_to_exclude = [] if self.station_autoselect_flag == True: stations_to_download = [] evlat = event.origins[0].latitude evlon = event.origins[0].longitude #EK changes added 04/2019 evdep = event.origins[0].depth for network in self.inventory: for station in network: stlat = station.latitude stlon = station.longitude #EK 04/2019 #this downloads data within Short Wave Window (SWW), a cone under the station bounded by an angle, here we chose 45 deg #calculate distance between eq and station and azimuth ddeg = locations2degrees(evlat, evlon, stlat, stlon) distance_m, az, baz = gps2dist_azimuth( evlat, evlon, stlat, stlon) #calculate proxy for incident angle theta = np.arctan2(distance_m, evdep) if theta <= np.pi / 4: #find if station has needed arrival arrivals = vel_model.get_travel_times( source_depth_in_km=evdep / 1000., distance_in_degree=ddeg, phase_list=["s", "S"]) if len(arrivals) > 0: #get stations you want to download stations_to_download.append(station.code) print(station.code, 'angle = %.2f' % np.rad2deg(theta)) print(arrivals) cnt = cnt + 1 else: stations_to_exclude.append(station.code) else: if station.code not in stations_to_exclude: stations_to_exclude.append(station.code) print( "\n-------------\n%g event-station pairs found in SWW\n-------------\n" % cnt) print( "\n-------------\nSelecting just the following stations for download\n-------------\n" ) print(stations_to_download) #this approach doesn't work, use exclude_stations flag later #restrictions = Restrictions(starttime=origin_time,endtime=origin_time + tracelen,\ #reject_channels_with_gaps=False, minimum_length=0.95, minimum_interstation_distance_in_m=10E3,\ #channel=self.channel,location="",network=self.network,station=stations_to_download) #case where we have single network if self.network: restrictions = Restrictions(starttime=origin_time,endtime=origin_time + tracelen,\ reject_channels_with_gaps=False, minimum_length=0.95, minimum_interstation_distance_in_m=10E3,\ channel=self.channel,location="",network=self.network,exclude_stations=stations_to_exclude) #Case where we want all networks within a region (assumes that we also want all stations unless we have built # a stations to exclude list) else: restrictions = Restrictions(starttime=origin_time,endtime=origin_time + tracelen,\ reject_channels_with_gaps=False, minimum_length=0.95, minimum_interstation_distance_in_m=10E3,\ channel=self.channel,exclude_stations=stations_to_exclude) mdl = MassDownloader(providers=[self.clientname]) mdl.download(domain, restrictions, mseed_storage=datadirpath,\ stationxml_storage=stationdirpath)
starttime=starttime, endtime=endtime, network=network, channel="BH?,SH?,LH?", chunklength_in_sec=86400, reject_channels_with_gaps=False, minimum_length=0.0, minimum_interstation_distance_in_m=50.0, location_priorities=['', '00', '10', '01', '*'], ) def mseed_storage(network, station, location, channel, starttime, endtime): path = os.path.join( DATADIR, starttime.strftime("%Y%m%d"), "{}.{}.{}.{}.mseed".format(network, station, location, channel)) if os.path.exists(path): return True else: return path mdl = MassDownloader(providers=["IRIS"]) mdl.download(domain, restrictions, mseed_storage=mseed_storage, stationxml_storage="stations", threads_per_client=15)
# neither. You can add more/less patterns if you like. channel_priorities=["HH[ZNE]", "BH[ZNE]"], # Location codes are arbitrary and there is no rule as to which # location is best. Same logic as for the previous setting. #location_priorities=["", "00", "10"] ) # No specified providers will result in all known ones being queried. #client_gfz = Client("GFZ", user="******",password="******") #-dnzc9DIzdWl-vMoejGxuWJJ #-dnzc9DIzdWl-vMoejGxuWJJ:PVy8f6Xt-vXyV5vu client_gfz = Client("GFZ", user="******", password="******") #client_eth = Client("ETH", user="******", password="******") mdl = MassDownloader(providers=[client_gfz, "IRIS"]) # The data will be downloaded to the ``./waveforms/`` and ``./stations/`` # folders with automatically chosen file names. mdl.download(domain, restrictions, mseed_storage=eventid + "/waveforms", stationxml_storage=eventid + "/stations") ###############################################transform the mseed into asdf using the pyasdf import glob import os import pyasdf if ts == True: ds = pyasdf.ASDFDataSet("WITHFOCAL/" + eventid + ".h5", compression="gzip-3")
def downloadMseeds(client_list, stations_json, output_dir, start_time, end_time, min_lat, max_lat, min_lon, max_lon, chunk_size, channel_list=[], n_processor=None): """ Uses obspy mass downloader to get continuous waveforms from a specific client in miniseed format in variable chunk sizes. The minimum chunk size is 1 day. Parameters ---------- client_list: list List of client names e.g. ["IRIS", "SCEDC", "USGGS"]. stations_json: dic, Station informations. output_dir: str Output directory. min_lat: float Min latitude of the region. max_lat: float Max latitude of the region. min_lon: float Min longitude of the region. max_lon: float Max longitude of the region. start_time: str Start DateTime for the beginning of the period in "YYYY-MM-DDThh:mm:ss.f" format. end_time: str End DateTime for the beginning of the period in "YYYY-MM-DDThh:mm:ss.f" format. channel_list: str, default=[] A list containing the desired channel codes. Downloads will be limited to these channels based on priority. Defaults to [] --> all channels chunk_size: int Chunck size in day. n_processor: int, default=None Number of CPU processors for parallel downloading. Returns ---------- output_name/station_name/*.mseed: Miniseed fiels for each station. Warning ---------- usage of multiprocessing and parallel downloads heavily depends on the client. It might cause missing some data for some network. Please test first for some short period and if It did miss some chunks of data for some channels then set n_processor=None to avoid parallel downloading. """ json_file = open(stations_json) station_dic = json.load(json_file) print( f"####### There are {len(station_dic)} stations in the list. #######") start_t = UTCDateTime(start_time) end_t = UTCDateTime(end_time) domain = RectangularDomain(minlatitude=min_lat, maxlatitude=max_lat, minlongitude=min_lon, maxlongitude=max_lon) mdl = MassDownloader(providers=client_list) bg = start_t if n_processor == None: for st in station_dic: print(f'======= Working on {st} station.') _get_w(bg, st, station_dic, end_t, mdl, domain, output_dir, chunk_size, channel_list) else: def process(st): print(f'======= Working on {st} station.') _get_w(bg, st, station_dic, end_t, mdl, domain, output_dir, chunk_size, channel_list) with ThreadPool(n_processor) as p: p.map(process, [st for st in station_dic])
def retrieveData(self): """Retrieve data from many FDSN services, turn into StreamCollection. Args: event (dict): Best dictionary matching input event, fields as above in return of getMatchingEvents(). Returns: StreamCollection: StreamCollection object. """ rawdir = self.rawdir if self.rawdir is None: rawdir = tempfile.mkdtemp() else: if not os.path.isdir(rawdir): os.makedirs(rawdir) # use the mass downloader to retrieve data of interest from any FSDN # service. origin_time = UTCDateTime(self.time) # The Obspy mass downloader has it's own logger - grab that stream # and write it to our own log file ldict = logging.Logger.manager.loggerDict if OBSPY_LOGGER in ldict: root = logging.getLogger() fhandler = root.handlers[0] obspy_logger = logging.getLogger(OBSPY_LOGGER) obspy_stream_handler = obspy_logger.handlers[0] obspy_logger.removeHandler(obspy_stream_handler) obspy_logger.addHandler(fhandler) # Circular domain around the epicenter. domain = CircularDomain(latitude=self.lat, longitude=self.lon, minradius=0, maxradius=self.radius) restrictions = Restrictions( # Define the temporal bounds of the waveform data. starttime=origin_time - self.time_before, endtime=origin_time + self.time_after, network=self.network, station='*', location='*', location_priorities=['*'], reject_channels_with_gaps=self.reject_channels_with_gaps, # Any trace that is shorter than 95 % of the # desired total duration will be discarded. minimum_length=self.minimum_length, sanitize=self.sanitize, minimum_interstation_distance_in_m=self.minimum_interstation_distance_in_m, exclude_networks=self.exclude_networks, exclude_stations=self.exclude_stations, channel_priorities=self.channels) # DEBUGGING pp = pprint.PrettyPrinter() pp.pprint(domain.__dict__) print('***************************') pp.pprint(restrictions.__dict__) # DEBUGGING # No specified providers will result in all known ones being queried. mdl = MassDownloader() # we can have a problem of file overlap, so let's remove existing # mseed files from the raw directory. logging.info('Deleting old MiniSEED files...') delete_old_files(rawdir, '*.mseed') # remove existing png files as well logging.info('Deleting old PNG files...') delete_old_files(rawdir, '*.png') # remove existing xml files as well logging.info('Deleting old XML files...') delete_old_files(rawdir, '*.xml') logging.info('Downloading new MiniSEED files...') # The data will be downloaded to the ``./waveforms/`` and ``./stations/`` # folders with automatically chosen file names. mdl.download(domain, restrictions, mseed_storage=rawdir, stationxml_storage=rawdir) seed_files = glob.glob(os.path.join(rawdir, '*.mseed')) streams = [] for seed_file in seed_files: tstreams = read_fdsn(seed_file) streams += tstreams stream_collection = StreamCollection(streams=streams, drop_non_free=self.drop_non_free) return stream_collection