def read_usc(filename, **kwargs): """Read USC V1 strong motion file. Args: filename (str): Path to possible USC V1 data file. kwargs (ref): Ignored by this function. Returns: Stream: Obspy Stream containing three channels of acceleration data (cm/s**2). """ logging.debug("Starting read_usc.") valid, alternate = is_usc(filename, return_alternate=True) if not valid: raise Exception('%s is not a valid USC file' % filename) # Check for Location location = kwargs.get('location', '') f = None try: f = open(filename, 'rt') first_line = f.readline() except: pass finally: if f is not None: f.close() if first_line.find('OF UNCORRECTED ACCELEROGRAM DATA OF') >= 0: stream = read_volume_one( filename, location=location, alternate=alternate) else: raise GMProcessException('USC: Not a supported volume.') return stream
def read_data(filename, read_format=None, **kwargs): """ Read strong motion data from a file. Args: filename (str): Path to file read_format (str): Format of file Returns: list: Sequence of obspy.core.stream.Streams read from file """ # Check if file exists if not os.path.exists(filename): raise GMProcessException('Not a file %r' % filename) # Get and validate format if read_format is None: read_format = _get_format(filename) else: read_format = _validate_format(filename, read_format.lower()) # Load reader and read file reader = 'gmprocess.io.' + read_format + '.core' reader_module = importlib.import_module(reader) read_name = 'read_' + read_format read_method = getattr(reader_module, read_name) streams = read_method(filename, **kwargs) return streams
def get_metadata( eqlat=None, eqlon=None, eqtime=None, eqradius=10, abandoned=False, station_type='Ground', eqtimewindow=10, # seconds station_radius=200): """Retrieve station metadata JSON from CESMD web service. Args: eqlat (float): Earthquake latitude. eqlon (float): Earthquake longitude. eqtime (datetime): Earthquake origin time. eqradius (float): Earthquake search radius (km). abandoned (bool): Whether or not to include abandoned stations in the search. station_type (str): One of the following station types: [%s] eqtimewidow (float): Earthquake time search window in sec. station_radius (str): Radius (km) to search for stations from epicenter. Returns: dict: Dictionary of event/station information. """ % (','.join(STATION_TYPES)) params = { 'rettype': 'metadata', 'groupby': 'event', 'format': 'json', 'nodata': 404, 'sttype': STATION_TYPES[station_type], 'abandoned': abandoned } has_event_info = (eqlat is not None) and (eqlon is not None) and (eqtime is not None) if not has_event_info: raise GMProcessException( 'get_metadata must get either event id or event information.') else: starttime = eqtime - timedelta(seconds=eqtimewindow // 2) endtime = eqtime + timedelta(seconds=eqtimewindow // 2) params['elat'] = eqlat params['elon'] = eqlon params['erad'] = eqradius params['startdate'] = starttime.strftime('%Y-%m-%dT%H:%M:%S') params['enddate'] = endtime.strftime('%Y-%m-%dT%H:%M:%S') params['maxepidist'] = station_radius session = Session() request = Request('GET', URL_TEMPLATE, params=params).prepare() response = session.get(request.url) if response.status_code != 200: fmt = 'Could not retrieve data from url "%s": Server response %i' raise Exception(fmt % (request.url, response.status_code)) metadata = response.json() return metadata
def pick_ar(stream, picker_config=None, config=None): """Wrapper around the AR P-phase picker. Args: stream (StationStream): Stream containing waveforms that need to be picked. picker_config (dict): Dictionary with parameters for AR P-phase picker. See picker.yml. config (dict): Configuration dictionary. Key value here is: windows: window_checks: min_noise_duration Returns: tuple: - Best estimate for p-wave arrival time (s since start of trace). - Mean signal to noise ratio based on the pick. """ if picker_config is None: picker_config = get_config(section='pickers') if config is None: config = get_config() min_noise_dur = config['windows']['window_checks']['min_noise_duration'] params = picker_config['ar'] # Get the east, north, and vertical components from the stream st_e = stream.select(channel='??[E1]') st_n = stream.select(channel='??[N2]') st_z = stream.select(channel='??[Z3]') # Check if we found one of each component # If not, use the next picker in the order of preference if len(st_e) != 1 or len(st_n) != 1 or len(st_z) != 1: raise GMProcessException('Unable to perform AR picker.') minloc = ar_pick(st_z[0].data, st_n[0].data, st_e[0].data, st_z[0].stats.sampling_rate, **params)[0] if minloc < min_noise_dur: fmt = 'Noise window (%.1f s) less than minimum (%.1f)' tpl = (minloc, min_noise_dur) raise GMProcessException(fmt % tpl) mean_snr = calc_snr(stream, minloc) return (minloc, mean_snr)
def _get_format(filename): """ Get the format of the file. Args: filename (str): Path to file Returns: string: Format of file. """ # Get the valid formats valid_formats = [] io_directory = pkg_resources.resource_filename('gmprocess', 'io') # Create valid list for module in os.listdir(io_directory): if module.find('.') < 0 and module not in EXCLUDED: valid_formats += [module] # Test each format formats = [] for valid_format in valid_formats: # Create the module and function name from the request reader = 'gmprocess.io.' + valid_format + '.core' reader_module = importlib.import_module(reader) is_name = 'is_' + valid_format is_method = getattr(reader_module, is_name) if is_method(filename): formats += [valid_format] # Return the format formats = np.asarray(formats) if len(formats) == 1: return formats[0] elif len(formats) == 2 and 'gmobspy' in formats: return formats[formats != 'gmobspy'][0] elif len(formats) == 0: raise GMProcessException('No format found for file %r.' % filename) else: raise GMProcessException( 'Multiple formats passing: %r. Please retry file %r ' 'with a specified format.' % (formats.tolist(), filename))
def is_usc(filename, **kwargs): """Check to see if file is a USC strong motion file. Args: filename (str): Path to possible USC V1 data file. Returns: bool: True if USC , False otherwise. """ logging.debug("Checking if format is usc.") # USC requires unique integer values # in column 73-74 on all text header lines # excluding the first file line return_alternate = kwargs.get('return_alternate', False) try: f = open(filename, 'rt') first_line = f.readline() if first_line.find('OF UNCORRECTED ACCELEROGRAM DATA OF') >= 0: volume = 'V1' start = 1 stop = 12 alternate_start = start + 2 alternate_stop = stop - 2 elif first_line.find('CORRECTED ACCELEROGRAM') >= 0: volume = 'V2' start = 2 stop = 12 alternate_start = start + 2 alternate_stop = stop - 2 elif first_line.find('RESPONSE') >= 0: raise GMProcessException( 'USC: Derived response spectra and fourier ' 'amplitude spectra not supported: %s' % filename) else: f.close() return False f.close() except Exception: return False finally: f.close() valid = _check_header(start, stop, filename) alternate = False if not valid: valid = _check_header(alternate_start, alternate_stop, filename) if valid: alternate = True if return_alternate: return valid, alternate else: return valid
def is_smc(filename): """Check to see if file is a SMC (corrected, in acc.) strong motion file. Args: filename (str): Path to possible SMC corrected data file. Returns: bool: True if SMC, False otherwise. """ logging.debug("Checking if format is smc.") try: with open(filename, 'rt') as f: lines = f.readlines() firstline = lines[0].strip() if firstline in VALID_HEADERS: return True if 'DISPLACEMENT' in firstline: return True raise GMProcessException( 'SMC: Diplacement records are not supported.') elif 'VELOCITY' in firstline: return True raise GMProcessException( 'SMC: Velocity records are not supported.') elif '*' in firstline: end_ascii = lines[10] if '*' in end_ascii: comment_row = int(lines[12].strip().split()[-1]) for r in range(27, 27 + comment_row): row = lines[r] if not row.startswith('|'): return False return True else: return False return False except UnicodeDecodeError: return False
def validate(self): """Some validation checks across streams. """ # If tag exists, it should be consistent across StationStreams all_labels = [] for stream in self: if hasattr(stream, 'tag'): station, label = stream.tag.split('_') all_labels.append(label) else: all_labels.append("") if len(set(all_labels)) > 1: raise GMProcessException( 'Only one label allowed within a StreamCollection.')
def pick_baer(stream, picker_config=None, config=None): """Wrapper around the Baer P-phase picker. Args: stream (StationStream): Stream containing waveforms that need to be picked. picker_config (dict): Dictionary with parameters for Baer P-phase picker. See picker.yml. config (dict): Configuration dictionary. Key value here is: windows: window_checks: min_noise_duration Returns: tuple: - Best estimate for p-wave arrival time (s since start of trace). - Mean signal to noise ratio based on the pick. """ if picker_config is None: picker_config = get_config(section='pickers') if config is None: config = get_config() min_noise_dur = config['windows']['window_checks']['min_noise_duration'] params = picker_config['baer'] locs = [] for trace in stream: pick_sample = pk_baer(trace.data, trace.stats.sampling_rate, **params)[0] loc = pick_sample * trace.stats.delta locs.append(loc) locs = np.array(locs) if np.any(locs >= 0): minloc = np.min(locs[locs >= 0]) else: minloc = -1 if minloc < min_noise_dur: fmt = 'Noise window (%.1f s) less than minimum (%.1f)' tpl = (minloc, min_noise_dur) raise GMProcessException(fmt % tpl) mean_snr = calc_snr(stream, minloc) return (minloc, mean_snr)
def pick_yeck(stream): """IN DEVELOPMENT! SNR based P-phase picker. Args: stream (StationStream): Stream containing waveforms that need to be picked. Returns: tuple: - Best estimate for p-wave arrival time (s since start of trace). - Mean signal to noise ratio based on the pick. """ min_window = 5.0 # put into config config = get_config() min_noise_dur = config['windows']['window_checks']['min_noise_duration'] locs = [] for trace in stream: data = trace.data sr = trace.stats.sampling_rate pidx_start = int(min_window * sr) snr = np.zeros(len(data)) for pidx in range(pidx_start, len(data) - pidx_start): snr_i = sub_calc_snr(data, pidx) snr[pidx] = snr_i snr = np.array(snr) pidx = snr.argmax() loc = pidx / sr locs.append(loc) locs = np.array(locs) if np.any(locs >= 0): minloc = np.min(locs[locs >= 0]) else: minloc = -1 if minloc < min_noise_dur: fmt = 'Noise window (%.1f s) less than minimum (%.1f)' tpl = (minloc, min_noise_dur) raise GMProcessException(fmt % tpl) mean_snr = calc_snr(stream, minloc) return (minloc, mean_snr)
def _get_channel(angle, sampling_rate): if angle == 500 or angle == 600 or (angle >= 0 and angle <= 360): if angle == 500 or angle == 600: channel = get_channel_name(sampling_rate, is_acceleration=True, is_vertical=True, is_north=False) elif angle >= 315 or angle < 45 or (angle >= 135 and angle < 225): channel = get_channel_name(sampling_rate, is_acceleration=True, is_vertical=False, is_north=True) else: channel = get_channel_name(sampling_rate, is_acceleration=True, is_vertical=False, is_north=False) else: errstr = ('Not enough information to distinguish horizontal from ' 'vertical channels.') raise GMProcessException('DMG: ' + errstr) return channel
def _read_volume_two(filename, line_offset, location='', units='acc'): """Read channel data from DMG text file. Args: filename (str): Input DMG V2 filename. line_offset (int): Line offset to beginning of channel text block. units (str): units to get Returns: tuple: (list of obspy Trace, int line offset) """ try: with open(filename, 'rt') as f: for _ in range(line_offset): next(f) lines = [next(f) for x in range(V2_TEXT_HDR_ROWS)] # Accounts for blank lines at end of files except StopIteration: return (None, 1 + line_offset) # read in lines of integer data skip_rows = V2_TEXT_HDR_ROWS + line_offset int_data = _read_lines(skip_rows, V2_INT_HDR_ROWS, V2_INT_FMT, filename) int_data = int_data[0:100].astype(np.int32) # read in lines of float data skip_rows += V2_INT_HDR_ROWS flt_data = _read_lines(skip_rows, V2_REAL_HDR_ROWS, V2_REAL_FMT, filename) flt_data = flt_data[:100] skip_rows += V2_REAL_HDR_ROWS # according to the powers that defined the Network.Station.Channel.Location # "standard", Location is a two character field. Most data providers, # including csmip/dmg here, don't always provide this. We'll flag it as # "--". hdr = _get_header_info(int_data, flt_data, lines, 'V2', location=location) head, tail = os.path.split(filename) hdr['standard']['source_file'] = tail or os.path.basename(head) traces = [] # read acceleration data if hdr['npts'] > 0: acc_rows, acc_fmt, unit = _get_data_format(filename, skip_rows, hdr['npts']) acc_data = _read_lines(skip_rows + 1, acc_rows, acc_fmt, filename) acc_data = acc_data[:hdr['npts']] if unit in UNIT_CONVERSIONS: acc_data *= UNIT_CONVERSIONS[unit] logging.debug('Data converted from %s to cm/s/s' % (unit)) else: raise GMProcessException('DMG: %s is not a supported unit.' % unit) acc_trace = StationTrace(acc_data.copy(), Stats(hdr.copy())) response = {'input_units': 'counts', 'output_units': 'cm/s^2'} acc_trace.setProvenance('remove_response', response) if units == 'acc': traces += [acc_trace] skip_rows += int(acc_rows) + 1 # ------------------------------------------------------------------------- # NOTE: The way we were initially reading velocity and displacement data was # not correct. I'm deleting it for now since we don't need it. If/when we # revisit this we need to be more careful about how this is handled. # ------------------------------------------------------------------------- # read velocity data vel_hdr = hdr.copy() vel_hdr['standard']['units'] = 'vel' vel_hdr['npts'] = int_data[63] if vel_hdr['npts'] > 0: vel_rows, vel_fmt, unit = _get_data_format(filename, skip_rows, vel_hdr['npts']) vel_data = _read_lines(skip_rows + 1, vel_rows, vel_fmt, filename) vel_data = vel_data[:vel_hdr['npts']] skip_rows += int(vel_rows) + 1 # read displacement data disp_hdr = hdr.copy() disp_hdr['standard']['units'] = 'disp' disp_hdr['npts'] = int_data[65] if disp_hdr['npts'] > 0: disp_rows, disp_fmt, unit = _get_data_format(filename, skip_rows, disp_hdr['npts']) disp_data = _read_lines(skip_rows + 1, disp_rows, disp_fmt, filename) disp_data = disp_data[:disp_hdr['npts']] skip_rows += int(disp_rows) + 1 new_offset = skip_rows + 1 # there is an 'end of record' line after the data] return (traces, new_offset)
def _read_volume_one(filename, line_offset, location='', units='acc'): """Read channel data from DMG Volume 1 text file. Args: filename (str): Input DMG V1 filename. line_offset (int): Line offset to beginning of channel text block. units (str): units to get Returns: tuple: (list of obspy Trace, int line offset) """ # Parse the header portion of the file try: with open(filename, 'rt') as f: for _ in range(line_offset): next(f) lines = [next(f) for x in range(V1_TEXT_HDR_ROWS)] # Accounts for blank lines at end of files except StopIteration: return (None, 1 + line_offset) unit = _get_units(lines[11]) # read in lines of integer data skip_rows = V1_TEXT_HDR_ROWS + line_offset int_data = _read_lines(skip_rows, V1_INT_HDR_ROWS, V2_INT_FMT, filename) int_data = int_data[0:100].astype(np.int32) # read in lines of float data skip_rows += V1_INT_HDR_ROWS flt_data = _read_lines(skip_rows, V1_REAL_HDR_ROWS, V2_REAL_FMT, filename) skip_rows += V1_REAL_HDR_ROWS # according to the powers that defined the Network.Station.Channel.Location # "standard", Location is a two character field. Most data providers, # including csmip/dmg here, don't always provide this. We'll flag it as # "--". hdr = _get_header_info_v1(int_data, flt_data, lines, 'V1', location=location) head, tail = os.path.split(filename) hdr['standard']['source_file'] = tail or os.path.basename(head) # sometimes (??) a line of text is inserted in between the float header and # the beginning of the data. Let's check for this... with open(filename, 'rt') as f: for _ in range(skip_rows): next(f) test_line = f.readline() has_text = re.search('[A-Z]+|[a-z]+', test_line) is not None if has_text: skip_rows += 1 widths = [9] * 8 max_rows = int(np.ceil(hdr['npts'] / 8)) data = _read_lines(skip_rows, max_rows, widths, filename) acc_data = data[:hdr['npts']] evenly_spaced = True # Sometimes, npts is incrrectly specified, leading to nans # in the resulting data. Fix that here if np.any(np.isnan(acc_data)): while np.isnan(acc_data[-1]): acc_data = acc_data[:-1] hdr['npts'] = len(acc_data) else: # acceleration data is interleaved between time data max_rows = int(np.ceil(hdr['npts'] / 5)) widths = [7] * 10 data = _read_lines(skip_rows, max_rows, widths, filename) acc_data = data[1::2][:hdr['npts']] times = data[0::2][:hdr['npts']] evenly_spaced = is_evenly_spaced(times) if unit in UNIT_CONVERSIONS: acc_data *= UNIT_CONVERSIONS[unit] logging.debug('Data converted from %s to cm/s/s' % (unit)) else: raise GMProcessException('DMG: %s is not a supported unit.' % unit) acc_trace = StationTrace(acc_data.copy(), Stats(hdr.copy())) # Check if the times were included in the file but were not evenly spaced if not evenly_spaced: acc_trace = resample_uneven_trace(acc_trace, times, acc_data) response = {'input_units': 'counts', 'output_units': 'cm/s^2'} acc_trace.setProvenance('remove_response', response) traces = [acc_trace] new_offset = skip_rows + max_rows + 1 # there is an end of record line return (traces, new_offset)
def read_dmg(filename, **kwargs): """Read DMG strong motion file. Notes: CSMIP is synonymous to as DMG in this reader. Args: filename (str): Path to possible DMG data file. kwargs (ref): units (str): String determining which timeseries is return. Valid options include 'acc', 'vel', 'disp'. Default is 'acc'. Other arguments will be ignored. Returns: Stream: Obspy Stream containing three channels of acceleration data (cm/s**2). """ logging.debug("Starting read_dmg.") if not is_dmg(filename): raise Exception('%s is not a valid DMG strong motion data file.' % filename) # Check for units and location units = kwargs.get('units', 'acc') location = kwargs.get('location', '') if units not in UNITS: raise Exception('DMG: Not a valid choice of units.') # Check for DMG format and determine volume type line = open(filename, 'rt').readline() if is_dmg(filename): if line.lower().find('uncorrected') >= 0: reader = 'V1' elif line.lower().find('corrected') >= 0: reader = 'V2' elif line.lower().find('response') >= 0: reader = 'V3' # Count the number of lines in the file with open(filename) as f: line_count = sum(1 for _ in f) # Read as many channels as are present in the file line_offset = 0 trace_list = [] while line_offset < line_count: if reader == 'V2': traces, line_offset = _read_volume_two(filename, line_offset, location=location, units=units) if traces is not None: trace_list += traces elif reader == 'V1': traces, line_offset = _read_volume_one(filename, line_offset, location=location, units=units) if traces is not None: trace_list += traces else: raise GMProcessException('DMG: Not a supported volume.') stream = StationStream([]) for trace in trace_list: # For our purposes, we only want acceleration, so lets only return # that; we may need to change this later if others start using this # code and want to read in the other data. if trace.stats['standard']['units'] == units: stream.append(trace) return [stream]
def getInventory(self): """ Extract an ObsPy inventory object from a Stream read in by gmprocess tools. """ networks = [trace.stats.network for trace in self] if len(set(networks)) > 1: raise Exception( "Input stream has stations from multiple networks.") # We'll first create all the various objects. These strongly follow the # hierarchy of StationXML files. source = '' if 'standard' in self[0].stats and 'source' in self[0].stats.standard: source = self[0].stats.standard.source inv = Inventory( # We'll add networks later. networks=[], # The source should be the id whoever create the file. source=source) net = Network( # This is the network code according to the SEED standard. code=networks[0], # A list of stations. We'll add one later. stations=[], description="source", # Start-and end dates are optional. ) channels = [] for trace in self: logging.debug('trace: %s' % trace) channel = _channel_from_stats(trace.stats) channels.append(channel) subdict = {} for k in UNUSED_STANDARD_PARAMS: if k in self[0].stats.standard: subdict[k] = self[0].stats.standard[k] format_specific = {} if 'format_specific' in self[0].stats: format_specific = dict(self[0].stats.format_specific) big_dict = {'standard': subdict, 'format_specific': format_specific} try: jsonstr = json.dumps(big_dict) except Exception as e: raise GMProcessException('Exception in json.dumps: %s' % e) sta = Station( # This is the station code according to the SEED standard. code=self[0].stats.station, latitude=self[0].stats.coordinates.latitude, elevation=self[0].stats.coordinates.elevation, longitude=self[0].stats.coordinates.longitude, channels=channels, site=Site(name=self[0].stats.standard.station_name), description=jsonstr, creation_date=UTCDateTime(1970, 1, 1), # this is bogus total_number_of_channels=len(self)) net.stations.append(sta) inv.networks.append(net) return inv
def read_smc(filename, **kwargs): """Read SMC strong motion file. Args: filename (str): Path to possible SMC data file. kwargs (ref): any_structure (bool): Read data from any type of structure, raise Exception if False and structure type is not free-field. accept_flagged (bool): accept problem flagged data. set_location (str): Two character code for location. Other arguments will be ignored. Returns: Stream: Obspy Stream containing one channel of acceleration data (cm/s**2). """ logging.debug("Starting read_smc.") any_structure = kwargs.get('any_structure', False) accept_flagged = kwargs.get('accept_flagged', False) location = kwargs.get('location', '') if not is_smc(filename): raise Exception('%s is not a valid SMC file' % filename) with open(filename, 'rt') as f: line = f.readline().strip() if 'DISPLACEMENT' in line: raise GMProcessException( 'SMC: Diplacement records are not supported: ' '%s.' % filename) elif 'VELOCITY' in line: raise GMProcessException( 'SMC: Velocity records are not supported: ' '%s.' % filename) elif line == "*": raise GMProcessException( 'SMC: No record volume specified in file: ' '%s.' % filename) stats, num_comments = _get_header_info(filename, any_structure=any_structure, accept_flagged=accept_flagged, location=location) skip = ASCII_HEADER_LINES + INTEGER_HEADER_LINES + \ num_comments + FLOAT_HEADER_LINES # read float data (8 columns per line) nrows = int(np.floor(stats['npts'] / DATA_COLUMNS)) data = np.genfromtxt(filename, max_rows=nrows, skip_header=skip, delimiter=FLOAT_DATA_WIDTHS) data = data.flatten() if stats['npts'] % DATA_COLUMNS: lastrow = np.genfromtxt(filename, max_rows=1, skip_header=skip + nrows, delimiter=FLOAT_DATA_WIDTHS) data = np.append(data, lastrow) data = data[0:stats['npts']] trace = StationTrace(data, header=stats) response = {'input_units': 'counts', 'output_units': 'cm/s^2'} trace.setProvenance('remove_response', response) stream = StationStream(traces=[trace]) return [stream]
def _read_channel(filename, line_offset, location=''): """Read channel data from COSMOS V1/V2 text file. Args: filename (str): Input COSMOS V1/V2 filename. line_offset (int): Line offset to beginning of channel text block. Returns: tuple: (obspy Trace, int line offset) """ # read station, location, and process level from text header with open(filename, 'rt') as f: for _ in range(line_offset): next(f) lines = [next(f) for x in range(TEXT_HDR_ROWS)] # read in lines of integer data skiprows = line_offset + TEXT_HDR_ROWS int_lines, int_data = _read_lines(skiprows, filename) int_data = int_data.astype(np.int32) # read in lines of float data skiprows += int_lines + 1 flt_lines, flt_data = _read_lines(skiprows, filename) # read in comment lines skiprows += flt_lines + 1 cmt_lines, cmt_data = _read_lines(skiprows, filename) skiprows += cmt_lines + 1 # according to the powers that defined the Network.Station.Channel.Location # "standard", Location is a two character field. Most data providers, # including cosmos here, don't provide this. We'll flag it as "--". hdr = _get_header_info(int_data, flt_data, lines, cmt_data, location=location) head, tail = os.path.split(filename) hdr['standard']['source_file'] = tail or os.path.basename(head) # read in the data nrows, data = _read_lines(skiprows, filename) # Check for "off-by-one" problem that sometimes occurs with cosmos data # Notes: # - We cannot do this check inside _get_header_info because we don't # have the data there. # - That method is written to set npts from the header as documented in # the spec ("lenght" == npts*dt) but it appears that sometimes a # different convention is used where the "length" of the record is # actually is actuation (npts-1)*dt. In this case, we need to # recompute duration and npts if hdr['npts'] == (len(data) - 1): hdr['npts'] = len(data) hdr['duration'] = (hdr['npts'] - 1) * hdr['delta'] # check units unit = hdr['format_specific']['physical_units'] if unit in UNIT_CONVERSIONS: data *= UNIT_CONVERSIONS[unit] logging.debug('Data converted from %s to cm/s/s' % (unit)) else: if unit != 'counts': raise GMProcessException( 'COSMOS: %s is not a supported unit.' % unit) if hdr['standard']['units'] != 'acc': raise GMProcessException('COSMOS: Only acceleration data accepted.') trace = StationTrace(data.copy(), Stats(hdr.copy())) # record that this data has been converted to g, if it has if hdr['standard']['process_level'] != PROCESS_LEVELS['V0']: response = {'input_units': 'counts', 'output_units': 'cm/s^2'} trace.setProvenance('remove_response', response) # set new offset new_offset = skiprows + nrows new_offset += 1 # there is an 'end of record' line after the data return (trace, new_offset)
def addStreams(self, event, streams, label=None): """Add a sequence of StationStream objects to an ASDF file. Args: event (Event): Obspy event object. streams (list): List of StationStream objects. label (str): Label to attach to stream sequence. Cannot contain an underscore. """ if label is not None: if '_' in label: raise GMProcessException( 'Stream label cannot contain an underscore.') # To allow for multiple processed versions of the same Stream # let's keep a dictionary of stations and sequence number. eventid = _get_id(event) if not self.hasEvent(eventid): self.addEvent(event) station_dict = {} for stream in streams: station = stream[0].stats['station'] logging.info('Adding waveforms for station %s' % station) # is this a raw file? Check the trace for provenance info. is_raw = not len(stream[0].getProvenanceKeys()) if label is not None: tag = '%s_%s_%s' % (eventid, station.lower(), label) else: if station.lower() in station_dict: station_sequence = station_dict[station.lower()] + 1 else: station_sequence = 1 station_dict[station.lower()] = station_sequence tag = '%s_%s_%05i' % ( eventid, station.lower(), station_sequence) if is_raw: level = 'raw' else: level = 'processed' self.dataset.add_waveforms(stream, tag=tag, event_id=event) # add processing provenance info from traces if level == 'processed': provdocs = stream.getProvenanceDocuments() for provdoc, trace in zip(provdocs, stream): tpl = (trace.stats.network.lower(), trace.stats.station.lower(), trace.stats.channel.lower()) channel = '%s_%s_%s' % tpl channel_tag = '%s_%s' % (tag, channel) self.dataset.add_provenance_document( provdoc, name=channel_tag ) # add processing parameters from streams jdict = {} for key in stream.getStreamParamKeys(): value = stream.getStreamParam(key) jdict[key] = value if len(jdict): # NOTE: We would store this dictionary just as # the parameters dictionary, but HDF cannot handle # nested dictionaries. # Also, this seems like a lot of effort # just to store a string in HDF, but other # approached failed. Suggestions are welcome. jdict = _stringify_dict(jdict) jsonbytes = json.dumps(jdict).encode('utf-8') jsonarray = np.frombuffer(jsonbytes, dtype=np.uint8) dtype = 'StreamProcessingParameters' self.dataset.add_auxiliary_data( jsonarray, data_type=dtype, path=tag, parameters={} ) # add processing parameters from traces for trace in stream: path = '%s_%s' % (tag, trace.stats.channel) jdict = {} for key in trace.getParameterKeys(): value = trace.getParameter(key) jdict[key] = value if len(jdict): # NOTE: We would store this dictionary just as # the parameters dictionary, but HDF cannot handle # nested dictionaries. # Also, this seems like a lot of effort # just to store a string in HDF, but other # approached failed. Suggestions are welcome. jdict = _stringify_dict(jdict) jsonbytes = json.dumps(jdict).encode('utf-8') jsonarray = np.frombuffer(jsonbytes, dtype=np.uint8) dtype = 'TraceProcessingParameters' self.dataset.add_auxiliary_data( jsonarray, data_type=dtype, path=path, parameters={} ) inventory = stream.getInventory() self.dataset.add_stationxml(inventory)
def _get_header_info(int_data, flt_data, lines, cmt_data, location=''): """Return stats structure from various headers. Output is a dictionary like this: - network (str): Default is '--'. Determined using COSMOS_NETWORKS - station (str) - channel (str): Determined using COSMOS_ORIENTATIONS - location (str): Set to location index of sensor site at station. If not a multi-site array, default is '--'. - starttime (datetime) - duration (float) - sampling_rate (float) - delta (float) - npts (int) - coordinates: - latitude (float) - longitude (float) - elevation (float) - standard (Defaults are either np.nan or '') - horizontal_orientation (float): Rotation from north (degrees) - instrument_period (float): Period of sensor (Hz) - instrument_damping (float): Fraction of critical - process_time (datetime): Reported date of processing - process_level: Either 'V0', 'V1', 'V2', or 'V3' - station_name (str): Long form station description - sensor_serial_number (str): Reported sensor serial - instrument (str): See SENSOR_TYPES - comments (str): Processing comments - structure_type (str): See BUILDING_TYPES - corner_frequency (float): Sensor corner frequency (Hz) - units (str): See UNITS - source (str): Network source description - source_format (str): Always cosmos - format_specific - physical_units (str): See PHYSICAL_UNITS - v30 (float): Site geology V30 (km/s) - least_significant_bit: Recorder LSB in micro-volts (uv/count) - low_filter_type (str): Filter used for low frequency V2 filtering (see FILTERS) - low_filter_corner (float): Filter corner for low frequency V2 filtering (Hz) - low_filter_decay (float): Filter decay for low frequency V2 filtering (dB/octabe) - high_filter_type (str): Filter used for high frequency V2 filtering (see FILTERS) - high_filter_corner (float): Filter corner for high frequency V2 filtering (Hz) - high_filter_decay (float): Filter decay for high frequency V2 filtering (dB/octabe) - maximum (float): Maximum value - maximum_time (float): Time at which maximum occurs - station_code (int): Code for structure_type - record_flag (str): Either 'No problem', 'Fixed', 'Unfixed problem'. Should be described in more depth in comments. - scaling_factor (float): Scaling used for converting acceleration from g/10 to cm/s/s - sensor_sensitivity (float): Sensitvity in volts/g Args: int_data (ndarray): Array of integer data flt_data (ndarray): Array of float data lines (list): List of text headers (str) cmt_data (ndarray): Array of comments (str) Returns: dictionary: Dictionary of header/metadata information """ hdr = {} coordinates = {} standard = {} format_specific = {} # Get unknown parameter number try: unknown = int(lines[12][64:71]) except ValueError: unknown = -999 # required metadata network_num = int(int_data[10]) # Get network from cosmos table or fdsn code sheet if network_num in COSMOS_NETWORKS: network = COSMOS_NETWORKS[network_num][0] source = COSMOS_NETWORKS[network_num][1] if network == '': network = COSMOS_NETWORKS[network_num][2] else: network_code = lines[4][25:27].upper() if network_code in CODES: network = network_code idx = np.argwhere(CODES == network_code)[0][0] source = SOURCES1[idx].decode( 'utf-8') + ', ' + SOURCES2[idx].decode('utf-8') else: network = 'ZZ' source = '' hdr['network'] = network logging.debug('network: %s' % network) hdr['station'] = lines[4][28:34].strip() logging.debug('station: %s' % hdr['station']) horizontal_angle = int(int_data[53]) logging.debug('horizontal_angle: %s' % horizontal_angle) if horizontal_angle not in VALID_AZIMUTH_INTS: logging.warning("Horizontal_angle in COSMOS header is not valid.") horizontal_angle = float(horizontal_angle) # Store delta and duration. Use them to calculate npts and sampling_rate # NOTE: flt_data[33] is the delta of the V0 format, and if we are reading # a V1 or V2 format then it may have been resampled. We should consider # adding flt_data[33] delta to the provenance record at some point. delta = float(flt_data[61]) * MSEC_TO_SEC if delta != unknown: hdr['delta'] = delta hdr['sampling_rate'] = 1 / delta # Determine the angle based upon the cosmos table # Set horizontal angles other than N,S,E,W to H1 and H2 # Missing angle results in the channel number if horizontal_angle != unknown: if horizontal_angle in COSMOS_ORIENTATIONS: channel = COSMOS_ORIENTATIONS[horizontal_angle][1].upper() if channel == 'UP' or channel == 'DOWN' or channel == 'VERT': channel = get_channel_name(hdr['sampling_rate'], is_acceleration=True, is_vertical=True, is_north=False) elif horizontal_angle >= 0 and horizontal_angle <= 360: if (horizontal_angle > 315 or horizontal_angle < 45 or (horizontal_angle > 135 and horizontal_angle < 225)): channel = get_channel_name(hdr['sampling_rate'], is_acceleration=True, is_vertical=False, is_north=True) else: channel = get_channel_name(hdr['sampling_rate'], is_acceleration=True, is_vertical=False, is_north=False) horizontal_orientation = horizontal_angle else: errstr = ('Not enough information to distinguish horizontal from ' 'vertical channels.') raise GMProcessException('COSMOS: ' + errstr) hdr['channel'] = channel logging.debug('channel: %s' % hdr['channel']) if location == '': location = int(int_data[55]) location = str(_check_assign(location, unknown, '--')) if len(location) < 2: location = location.zfill(2) hdr['location'] = location else: hdr['location'] = location year = int(int_data[39]) month = int(int_data[41]) day = int(int_data[42]) hour = int(int_data[43]) minute = int(int_data[44]) second = float(flt_data[29]) # If anything more than seconds is excluded # It is considered inadequate time information if second == unknown: try: hdr['starttime'] = datetime(year, month, day, hour, minute) except Exception: raise GMProcessException( 'COSMOS: Inadequate start time information.') else: second = second microsecond = int((second - int(second)) * 1e6) try: hdr['starttime'] = datetime(year, month, day, hour, minute, int(second), microsecond) except Exception: raise GMProcessException( 'COSMOS: Inadequate start time information.') if flt_data[62] != unknown: # COSMOS **defines** "length" as npts*dt (note this is a bit unusual) cosmos_length = flt_data[62] npts = int(cosmos_length / delta) hdr['duration'] = (npts - 1) * delta hdr['npts'] = npts else: raise ValueError('COSMOS file does not specify length.') # coordinate information coordinates['latitude'] = float(flt_data[0]) coordinates['longitude'] = float(flt_data[1]) coordinates['elevation'] = float(flt_data[2]) for key in coordinates: if coordinates[key] == unknown: warnings.warn('Missing %r. Setting to np.nan.' % key, Warning) coordinates[key] = np.nan hdr['coordinates'] = coordinates # standard metadata standard['source'] = source standard['horizontal_orientation'] = horizontal_orientation station_name = lines[4][40:-1].strip() standard['station_name'] = station_name instrument_frequency = float(flt_data[39]) standard['instrument_period'] = 1.0 / _check_assign( instrument_frequency, unknown, np.nan) instrument_damping = float(flt_data[40]) standard['instrument_damping'] = _check_assign(instrument_damping, unknown, np.nan) process_line = lines[10][10:40] if process_line.find('-') >= 0 or process_line.find('/') >= 0: if process_line.find('-') >= 0: delimeter = '-' elif process_line.find('/') >= 0: delimeter = '/' try: date = process_line.split(delimeter) month = int(date[0][-2:]) day = int(date[1]) year = int(date[2][:4]) time = process_line.split(':') hour = int(time[0][-2:]) minute = int(time[1]) second = float(time[2][:2]) microsecond = int((second - int(second)) * 1e6) etime = datetime(year, month, day, hour, minute, int(second), microsecond) standard['process_time'] = etime.strftime(TIMEFMT) except Exception: standard['process_time'] = '' else: standard['process_time'] = '' process_level = int(int_data[0]) if process_level == 0: standard['process_level'] = PROCESS_LEVELS['V0'] elif process_level == 1: standard['process_level'] = PROCESS_LEVELS['V1'] elif process_level == 2: standard['process_level'] = PROCESS_LEVELS['V2'] elif process_level == 3: standard['process_level'] = PROCESS_LEVELS['V3'] else: standard['process_level'] = PROCESS_LEVELS['V1'] logging.debug("process_level: %s" % process_level) serial = int(int_data[52]) if serial != unknown: standard['sensor_serial_number'] = str( _check_assign(serial, unknown, '')) else: standard['sensor_serial_number'] = '' instrument = int(int_data[51]) if instrument != unknown and instrument in SENSOR_TYPES: standard['instrument'] = SENSOR_TYPES[instrument] else: standard['instrument'] = lines[6][57:-1].strip() structure_type = int(int_data[18]) if structure_type != unknown and structure_type in BUILDING_TYPES: standard['structure_type'] = BUILDING_TYPES[structure_type] else: standard['structure_type'] = '' frequency = float(flt_data[25]) standard['corner_frequency'] = _check_assign(frequency, unknown, np.nan) physical_parameter = int(int_data[2]) units = int(int_data[1]) if units != unknown and units in UNITS: standard['units'] = UNITS[units] else: if physical_parameter in [2, 4, 7, 10, 11, 12, 23]: standard['units'] = 'acc' elif physical_parameter in [5, 8, 24]: standard['units'] = 'vel' elif physical_parameter in [6, 9, 25]: standard['units'] = 'disp' standard['source_format'] = 'cosmos' standard['comments'] = ', '.join(cmt_data) # format specific metadata if physical_parameter in PHYSICAL_UNITS: physical_parameter = PHYSICAL_UNITS[physical_parameter][0] format_specific['physical_units'] = physical_parameter v30 = float(flt_data[3]) format_specific['v30'] = _check_assign(v30, unknown, np.nan) least_significant_bit = float(flt_data[21]) format_specific['least_significant_bit'] = _check_assign( least_significant_bit, unknown, np.nan) low_filter_type = int(int_data[60]) if low_filter_type in FILTERS: format_specific['low_filter_type'] = FILTERS[low_filter_type] else: format_specific['low_filter_type'] = '' low_filter_corner = float(flt_data[53]) format_specific['low_filter_corner'] = _check_assign( low_filter_corner, unknown, np.nan) low_filter_decay = float(flt_data[54]) format_specific['low_filter_decay'] = _check_assign( low_filter_decay, unknown, np.nan) high_filter_type = int(int_data[61]) if high_filter_type in FILTERS: format_specific['high_filter_type'] = FILTERS[high_filter_type] else: format_specific['high_filter_type'] = '' high_filter_corner = float(flt_data[56]) format_specific['high_filter_corner'] = _check_assign( high_filter_corner, unknown, np.nan) high_filter_decay = float(flt_data[57]) format_specific['high_filter_decay'] = _check_assign( high_filter_decay, unknown, np.nan) maximum = float(flt_data[63]) format_specific['maximum'] = _check_assign(maximum, unknown, np.nan) maximum_time = float(flt_data[64]) format_specific['maximum_time'] = _check_assign(maximum_time, unknown, np.nan) format_specific['station_code'] = _check_assign(structure_type, unknown, np.nan) record_flag = int(int_data[75]) if record_flag == 0: format_specific['record_flag'] = 'No problem' elif record_flag == 1: format_specific['record_flag'] = 'Fixed' elif record_flag == 2: format_specific['record_flag'] = 'Unfixed problem' else: format_specific['record_flag'] = '' scaling_factor = float(flt_data[87]) format_specific['scaling_factor'] = _check_assign(scaling_factor, unknown, np.nan) scaling_factor = float(flt_data[41]) format_specific['sensor_sensitivity'] = _check_assign( scaling_factor, unknown, np.nan) # Set dictionary hdr['standard'] = standard hdr['coordinates'] = coordinates hdr['format_specific'] = format_specific return hdr
def _read_channel(filename, line_offset, location=''): """Read channel data from COSMOS V1/V2 text file. Args: filename (str): Input COSMOS V1/V2 filename. line_offset (int): Line offset to beginning of channel text block. Returns: tuple: (obspy Trace, int line offset) """ # read station, location, and process level from text header with open(filename, 'rt') as f: for _ in range(line_offset): next(f) lines = [next(f) for x in range(TEXT_HDR_ROWS)] # read in lines of integer data skiprows = line_offset + TEXT_HDR_ROWS int_lines, int_data = _read_lines(skiprows, filename) int_data = int_data.astype(np.int32) # read in lines of float data skiprows += int_lines + 1 flt_lines, flt_data = _read_lines(skiprows, filename) # read in comment lines skiprows += flt_lines + 1 cmt_lines, cmt_data = _read_lines(skiprows, filename) skiprows += cmt_lines + 1 # according to the powers that defined the Network.Station.Channel.Location # "standard", Location is a two character field. Most data providers, # including cosmos here, don't provide this. We'll flag it as "--". hdr = _get_header_info(int_data, flt_data, lines, cmt_data, location=location) head, tail = os.path.split(filename) hdr['standard']['source_file'] = tail or os.path.basename(head) # read in the data nrows, data = _read_lines(skiprows, filename) # check units unit = hdr['format_specific']['physical_units'] if unit in UNIT_CONVERSIONS: data *= UNIT_CONVERSIONS[unit] logging.debug('Data converted from %s to cm/s/s' % (unit)) else: raise GMProcessException('COSMOS: %s is not a supported unit.' % unit) if hdr['standard']['units'] != 'acc': raise GMProcessException('COSMOS: Only acceleration data accepted.') trace = StationTrace(data.copy(), Stats(hdr.copy())) # record that this data has been converted to g, if it has if hdr['standard']['process_level'] != PROCESS_LEVELS['V0']: response = {'input_units': 'counts', 'output_units': 'cm/s^2'} trace.setProvenance('remove_response', response) # set new offset new_offset = skiprows + nrows new_offset += 1 # there is an 'end of record' line after the data return (trace, new_offset)
def _read_channel(filename, line_offset, volume, location='', alternate=False): """Read channel data from USC V1 text file. Args: filename (str): Input USC V1 filename. line_offset (int): Line offset to beginning of channel text block. volume (dictionary): Dictionary of formatting information Returns: tuple: (obspy Trace, int line offset) """ if alternate: int_rows = 5 int_fmt = 20 * [4] data_cols = 8 else: int_rows = volume['INT_HDR_ROWS'] int_fmt = volume['INT_FMT'] data_cols = 10 # Parse the header portion of the file try: with open(filename, 'rt') as f: for _ in range(line_offset): next(f) lines = [next(f) for x in range(volume['TEXT_HDR_ROWS'])] # Accounts for blank lines at end of files except StopIteration: return (None, 1 + line_offset) # read in lines of integer data skiprows = line_offset + volume['TEXT_HDR_ROWS'] int_data = np.genfromtxt(filename, skip_header=skiprows, max_rows=int_rows, dtype=np.int32, delimiter=int_fmt).flatten() # read in lines of float data skiprows += int_rows flt_data = np.genfromtxt(filename, skip_header=skiprows, max_rows=volume['FLT_HDR_ROWS'], dtype=np.float64, delimiter=volume['FLT_FMT']).flatten() hdr = _get_header_info(int_data, flt_data, lines, 'V1', location=location) skiprows += volume['FLT_HDR_ROWS'] # read in the data nrows = int(np.floor(hdr['npts'] * 2 / data_cols)) all_data = np.genfromtxt(filename, skip_header=skiprows, max_rows=nrows, dtype=np.float64, delimiter=volume['COL_FMT']) data = all_data.flatten()[1::2] times = all_data.flatten()[0::2] frac = hdr['format_specific']['fractional_unit'] if frac > 0: data *= UNIT_CONVERSIONS['g'] * frac logging.debug('Data converted from g * %s to cm/s/s' % (frac)) else: unit = _get_units(lines[11]) if unit in UNIT_CONVERSIONS: data *= UNIT_CONVERSIONS[unit] logging.debug('Data converted from %s to cm/s/s' % (unit)) else: raise GMProcessException('USC: %s is not a supported unit.' % unit) # Put file name into dictionary head, tail = os.path.split(filename) hdr['standard']['source_file'] = tail or os.path.basename(head) trace = StationTrace(data.copy(), Stats(hdr.copy())) if not is_evenly_spaced(times): trace = resample_uneven_trace(trace, times, data) response = {'input_units': 'counts', 'output_units': 'cm/s^2'} trace.setProvenance('remove_response', response) # set new offset new_offset = skiprows + nrows new_offset += 1 # there is an 'end of record' line after the data return (trace, new_offset)
def group_channels(streams): """Consolidate streams for the same event. Checks to see if there are channels for one station in different streams, and groups them into one stream. Then streams are checked for duplicate channels (traces). Args: streams (list): List of Stream objects. Returns: list: List of Stream objects. """ # Return the original stream if there is only one if len(streams) <= 1: return streams # Get the all traces trace_list = [] for stream in streams: for trace in stream: if trace.stats.network == '' or str(trace.stats.network) == 'nan': trace.stats.network = 'ZZ' if str(trace.stats.location) == 'nan': trace.stats.location = '' if trace.stats.location == '' or str( trace.stats.location) == 'nan': trace.stats.location = '--' trace_list += [trace] # Create a list of duplicate traces and event matches duplicate_list = [] match_list = [] for idx1, trace1 in enumerate(trace_list): matches = [] network = trace1.stats['network'] station = trace1.stats['station'] starttime = trace1.stats['starttime'] endtime = trace1.stats['endtime'] channel = trace1.stats['channel'] location = trace1.stats['location'] if 'units' in trace1.stats.standard: units = trace1.stats.standard['units'] else: units = '' if 'process_level' in trace1.stats.standard: process_level = trace1.stats.standard['process_level'] else: process_level = '' data = np.asarray(trace1.data) for idx2, trace2 in enumerate(trace_list): if idx1 != idx2 and idx1 not in duplicate_list: event_match = False duplicate = False if data.shape == trace2.data.shape: try: same_data = ((data == np.asarray(trace2.data)).all()) except AttributeError: same_data = (data == np.asarray(trace2.data)) else: same_data = False if 'units' in trace2.stats.standard: units2 = trace2.stats.standard['units'] else: units2 = '' if 'process_level' in trace2.stats.standard: process_level2 = trace2.stats.standard['process_level'] else: process_level2 = '' if (network == trace2.stats['network'] and station == trace2.stats['station'] and starttime == trace2.stats['starttime'] and endtime == trace2.stats['endtime'] and channel == trace2.stats['channel'] and location == trace2.stats['location'] and units == units2 and process_level == process_level2 and same_data): duplicate = True elif (network == trace2.stats['network'] and station == trace2.stats['station'] and starttime == trace2.stats['starttime'] and location == trace2.stats['location'] and units == units2 and process_level == process_level2): event_match = True if duplicate: duplicate_list += [idx2] if event_match: matches += [idx2] match_list += [matches] # Create an updated list of streams streams = [] for idx, matches in enumerate(match_list): stream = Stream() grouped = False for match_idx in matches: if match_idx not in duplicate_list: if idx not in duplicate_list: stream.append(trace_list[match_idx]) duplicate_list += [match_idx] grouped = True if grouped: stream.append(trace_list[idx]) duplicate_list += [idx] streams += [stream] # Check for ungrouped traces for idx, trace in enumerate(trace_list): if idx not in duplicate_list: stream = Stream() streams += [stream.append(trace)] logging.warning('One channel stream:\n%s' % (stream)) # Check for streams with more than three channels for stream in streams: if len(stream) > 3: raise GMProcessException('Stream with more than 3 channels:\n%s.' % (stream)) return streams
def get_records( output, email, unpack=False, process_level='raw', group_by='event', minpga=None, maxpga=None, min_station_dist=None, max_station_dist=None, network=None, station_type='Ground', include_inactive=False, station_name=None, min_station_latitude=None, max_station_latitude=None, min_station_longitude=None, max_station_longitude=None, station_latitude=None, station_longitude=None, radius_km=None, station_code=None, event_name=None, minmag=None, maxmag=None, fault_type=None, startdate=None, enddate=None, min_event_latitude=None, max_event_latitude=None, min_event_longitude=None, max_event_longitude=None, event_latitude=None, event_longitude=None, event_radius=None, eventid=None, ): """Retrieve strong motion waveform records from CESMD website. Args: output (str): Filename or directory where downloaded zip data will be written. unpack (bool): If True, all zipped files will be unpacked (output will become a directory name.) email (str): Email address of requesting user. process_level (str): One of 'raw','processed','plots'. group_by (str): One of 'event', 'station' minpga (float): Minimum PGA value. maxpga (float): Maximum PGA value. min_station_dist (float): Minimum station distance from epicenter. max_station_dist (float): Maximum station distance from epicenter. network (str): Source network of strong motion data. station_type (str): Type of strong motion station (array, dam, etc.) include_inactive (bool): Include results from stations that are no longer active. station_name (str): Search only for station matching input name. min_station_latitude (float): Latitude station min when using a box search. max_station_latitude (float): Latitude station max when using a box search. min_station_longitude (float): Longitude station min when using a box search. max_station_longitude (float): Longitude station max when using a box search. station_latitude (float): Center latitude for station search. station_longitude (float): Center longitude for station search. radius_km (float): Radius (km) for station search. station_code (str): Particular station code to search for. event_name (str): Earthquake name to search for. minmag (float): Magnitude minimum when using a magnitude search. maxmag (float): Magnitude maximum when using a magnitude search. fault_type (str): Fault type. start_date (str): Start date/time in YYYY-MM-DD HH:MM:SS format end_date (str): End date/time in YYYY-MM-DD HH:MM:SS format min_event_latitude (float): Latitude event min when using a box search. max_event_latitude (float): Latitude event max when using a box search. min_event_longitude (float): Longitude event min when using a box search. max_event_longitude (float): Longitude event max when using a box search. event_latitude (float): Center earthquake latitude for radius search. event_longitude (float): Center earthquake longitude for radius search. event_radius (float): Earthquake search radius (km). eventid (str): NEIC or other ANSS event ID. Returns: tuple: (Top level output directory, list of data files) """ # getting the inputargs must be the first line of the method! inputargs = locals().copy() del inputargs['output'] del inputargs['unpack'] # note: this only supports one of the options or all of them, # no other combinations. ?? if process_level not in PROCESS_LEVELS: fmt = 'Only process levels of %s are supported (%s was input)' tpl = (','.join(PROCESS_LEVELS), process_level) raise KeyError(fmt % tpl) if group_by not in GROUP_OPTIONS: fmt = 'Only process levels of %s are supported (%s was input)' tpl = (','.join(GROUP_OPTIONS), group_by) raise KeyError(fmt % tpl) # determine which network user wanted if network is not None and network not in NETWORKS: fmt = 'Network with ID %s not found in list of supported networks.' tpl = network raise KeyError(fmt % tpl) if station_type is not None and station_type not in STATION_TYPES: fmt = 'Station type %s not found in list of supported types.' tpl = station_type raise KeyError(fmt % tpl) # convert 'Ground' to 'G' for example inputargs['station_type'] = STATION_TYPES[inputargs['station_type']] # check against list of fault types if fault_type is not None and fault_type not in FAULT_TYPES: fmt = 'Fault type %s not found in supported fault types %s.' tpl = (fault_type, ','.join(FAULT_TYPES)) raise KeyError(fmt % tpl) # make sure there is only one method being used to select station geographically if min_station_latitude is not None and station_latitude is not None: raise Exception( 'Select stations either by bounding box or by radius, not both.') # make sure there is only one method being used to select events geographically if min_event_latitude is not None and event_latitude is not None: raise Exception( 'Select events either by bounding box or by radius, not both.') # now convert process levels to string webservice expects levels = {'processed': 'P', 'raw': 'R', 'plots': 'T', 'all': 'P,R,T'} inputargs['process_level'] = levels[process_level] # now convert input args to keys of parameters expected by params = {} for key, value in inputargs.items(): if key in KEY_TABLE: params[KEY_TABLE[key]] = value else: params[key] = value # convert all booleans to strings that are 'true' and 'false' for key, value in params.items(): if isinstance(value, bool): if value: params[key] = 'true' else: params[key] = 'false' # add in a couple of parameters that seem to be required params['orderby'] = 'epidist-asc' params['nodata'] = '404' params['rettype'] = 'dataset' session = Session() request = Request('GET', URL_TEMPLATE, params=params).prepare() url = request.url response = session.get(request.url) if not response.status_code == 200: fmt = 'Your url "%s" returned a status code of %i with message: "%s"' raise GMProcessException(fmt % (url, response.status_code, response.reason)) if unpack: if not os.path.exists(output): os.makedirs(output) fbytes = io.BytesIO(response.content) myzip = zipfile.ZipFile(fbytes, mode='r') members = myzip.namelist() for member in members: finfo = myzip.getinfo(member) if finfo.is_dir(): continue if not member.lower().endswith('.zip'): fin = myzip.open(member) flatfile = member.replace('/', '_') outfile = os.path.join(output, flatfile) with open(outfile, 'wb') as fout: fout.write(fin.read()) fin.close() else: zfiledata = io.BytesIO(myzip.read(member)) try: tmpzip = zipfile.ZipFile(zfiledata, mode='r') tmp_members = tmpzip.namelist() for tmp_member in tmp_members: tfinfo = tmpzip.getinfo(tmp_member) if not tfinfo.is_dir(): fin = tmpzip.open(tmp_member) flatfile = tmp_member.replace('/', '_') parent, _ = os.path.splitext(member) parent = parent.replace('/', '_') # sometimes the member ends with .zip.zip (??) parent = parent.replace('.zip', '') datadir = os.path.join(output, parent) if not os.path.exists(datadir): os.makedirs(datadir) outfile = os.path.join(datadir, flatfile) with open(outfile, 'wb') as fout: fout.write(fin.read()) fin.close() tmpzip.close() zfiledata.close() except Exception as e: fmt = ( 'Could not unpack sub-zip file "%s" due to error "%s". ' 'Skipping.') print(fmt % (member, str(e))) continue myzip.close() datafiles = [] for root, fdir, files in os.walk(output): for tfile in files: if not tfile.endswith('.json'): datafile = os.path.join(root, tfile) datafiles.append(datafile) return (os.path.abspath(output), datafiles) else: if not output.endswith('.zip'): output += '.zip' with open(output, 'wb') as f: f.write(response.content) return (output, [])
def addStreams(self, event, streams, label=None): """Add a sequence of StationStream objects to an ASDF file. Args: event (Event): Obspy event object. streams (list): List of StationStream objects. label (str): Label to attach to stream sequence. Cannot contain an underscore. """ if label is not None: if '_' in label: raise GMProcessException( 'Stream label cannot contain an underscore.') # To allow for multiple processed versions of the same Stream # let's keep a dictionary of stations and sequence number. eventid = _get_id(event) if not self.hasEvent(eventid): self.addEvent(event) for stream in streams: station = stream[0].stats['station'] logging.info('Adding waveforms for station %s' % station) # is this a raw file? Check the trace for provenance info. is_raw = not len(stream[0].getProvenanceKeys()) if label is None: tfmt = '%Y%m%d%H%M%S' tnow = UTCDateTime.now().strftime(tfmt) label = 'processed%s' % tnow tag = '{}_{}'.format(eventid, label) if is_raw: level = 'raw' else: level = 'processed' self.dataset.add_waveforms(stream, tag=tag, event_id=event) # add processing provenance info from traces if level == 'processed': provdocs = stream.getProvenanceDocuments() for provdoc, trace in zip(provdocs, stream): provname = format_nslct(trace.stats, tag) self.dataset.add_provenance_document( provdoc, name=provname ) # add processing parameters from streams jdict = {} for key in stream.getStreamParamKeys(): value = stream.getStreamParam(key) jdict[key] = value if len(jdict): # NOTE: We would store this dictionary just as # the parameters dictionary, but HDF cannot handle # nested dictionaries. # Also, this seems like a lot of effort # just to store a string in HDF, but other # approached failed. Suggestions are welcome. jdict = _stringify_dict(jdict) jsonbytes = json.dumps(jdict).encode('utf-8') jsonarray = np.frombuffer(jsonbytes, dtype=np.uint8) dtype = 'StreamProcessingParameters' parampath = '/'.join([ format_netsta(stream[0].stats), format_nslit(stream[0].stats, stream.get_inst(), tag) ]) self.dataset.add_auxiliary_data( jsonarray, data_type=dtype, path=parampath, parameters={} ) # add processing parameters from traces for trace in stream: procname = '/'.join([format_netsta(trace.stats), format_nslct(trace.stats, tag), ]) jdict = {} for key in trace.getParameterKeys(): value = trace.getParameter(key) jdict[key] = value if len(jdict): # NOTE: We would store this dictionary just as # the parameters dictionary, but HDF cannot handle # nested dictionaries. # Also, this seems like a lot of effort # just to store a string in HDF, but other # approached failed. Suggestions are welcome. jdict = _stringify_dict(jdict) jsonbytes = json.dumps(jdict).encode('utf-8') jsonarray = np.frombuffer(jsonbytes, dtype=np.uint8) dtype = 'TraceProcessingParameters' self.dataset.add_auxiliary_data( jsonarray, data_type=dtype, path=procname, parameters={} ) # Some processing data is computationally intensive to # compute, so we store it in the 'Cache' group. for specname in trace.getCachedNames(): spectrum = trace.getCached(specname) # we expect many of these specnames to # be joined with underscores. name_parts = specname.split('_') base_dtype = ''.join([part.capitalize() for part in name_parts]) for array_name, array in spectrum.items(): path = base_dtype + array_name.capitalize() + "/" + procname try: self.dataset.add_auxiliary_data( array, data_type='Cache', path=path, parameters={} ) except Exception as e: pass inventory = stream.getInventory() self.dataset.add_stationxml(inventory)
def _get_header_info(filename, any_structure=False, accept_flagged=False, location=''): """Return stats structure from various headers. Output is a dictionary like this: - network - station - channel - location (str): Set to floor the sensor is located on. If not a multi-sensor array, default is '--'. Can be set manually by the user. - starttime - sampling_rate - npts - coordinates: - latitude - longitude - elevation - standard - horizontal_orientation - instrument_period - instrument_damping - process_level - station_name - sensor_serial_number - instrument - comments - structure_type - corner_frequency - units - source - source_format - format_specific - vertical_orientation - building_floor (0=basement, 1=floor above basement, -1=1st sub-basement, etc. - bridge_number_spans - bridge_transducer_location ("free field", "at the base of a pier or abutment", "on an abutment", "on the deck at the top of a pier" "on the deck between piers or between an abutment and a pier." dam_transducer_location ("upstream or downstream free field", "at the base of the dam", "on the crest of the dam", on the abutment of the dam") construction_type ("Reinforced concrete gravity", "Reinforced concrete arch", "earth fill", "other") filter_poles data_source """ stats = {} standard = {} format_specific = {} coordinates = {} # read the ascii header lines with open(filename) as f: ascheader = [next(f).strip() for x in range(ASCII_HEADER_LINES)] standard['process_level'] = PROCESS_LEVELS[VALID_HEADERS[ascheader[0]]] logging.debug("process_level: %s" % standard['process_level']) # station code is in the third line stats['station'] = '' if len(ascheader[2]) >= 4: stats['station'] = ascheader[2][0:4].strip() stats['station'] = stats['station'].strip('\x00') logging.debug('station: %s' % stats['station']) standard['process_time'] = '' standard['station_name'] = ascheader[5][10:40].strip() # sometimes the data source has nothing in it, # most of the time it seems has has USGS in it # sometimes it's something like JPL/USGS, CDOT/USGS, etc. # if it's got USGS in it, let's just say network=US, otherwise "--" stats['network'] = 'ZZ' if ascheader[7].find('USGS') > -1: stats['network'] = 'US' try: standard['source'] = ascheader[7].split('=')[2].strip() except IndexError: standard['source'] = 'USGS' if standard['source'] == '': standard['source'] = 'USGS' standard['source_format'] = 'smc' # read integer header data intheader = np.genfromtxt(filename, dtype=np.int32, max_rows=INTEGER_HEADER_LINES, skip_header=ASCII_HEADER_LINES, delimiter=INT_HEADER_WIDTHS) # 8 columns per line # first line is start time information, and then inst. serial number missing_data = intheader[0, 0] year = intheader[0, 1] # sometimes the year field has a 0 in it. When this happens, we # can try to get a timestamp from line 4 of the ascii header. if year == 0: parts = ascheader[3].split() try: year = int(parts[0]) except ValueError as ve: fmt = ('Could not find year in SMC file %s. Not present ' 'in integer header and not parseable from line ' '4 of ASCII header. Error: "%s"') raise GMProcessException(fmt % (filename, str(ve))) jday = intheader[0, 2] hour = intheader[0, 3] minute = intheader[0, 4] if (year != missing_data and jday != missing_data and hour != missing_data and minute != missing_data): # Handle second if missing second = 0 if not intheader[0, 5] == missing_data: second = intheader[0, 5] # Handle microsecond if missing and convert milliseconds to microseconds microsecond = 0 if not intheader[0, 6] == missing_data: microsecond = intheader[0, 6] / 1e3 datestr = '%i %00i %i %i %i %i' % (year, jday, hour, minute, second, microsecond) stats['starttime'] = datetime.strptime(datestr, '%Y %j %H %M %S %f') else: logging.warning('No start time provided. ' 'This must be set manually for network/station: ' '%s/%s.' % (stats['network'], stats['station'])) standard['comments'] = 'Missing start time.' standard['sensor_serial_number'] = '' if intheader[1, 3] != missing_data: standard['sensor_serial_number'] = str(intheader[1, 3]) # we never get a two character location code so floor location is used if location == '': location = intheader.flatten()[24] if location != missing_data: location = str(location) if len(location) < 2: location = location.zfill(2) stats['location'] = location else: stats['location'] = '--' else: stats['location'] = location # second line is information about number of channels, orientations # we care about orientations format_specific['vertical_orientation'] = np.nan if intheader[1, 4] != missing_data: format_specific['vertical_orientation'] = int(intheader[1, 4]) standard['horizontal_orientation'] = np.nan standard['vertical_orientation'] = np.nan if intheader[1, 5] != missing_data: standard['horizontal_orientation'] = float(intheader[1, 5]) if intheader[1, 6] == missing_data or intheader[1, 6] not in INSTRUMENTS: standard['instrument'] = '' else: standard['instrument'] = INSTRUMENTS[intheader[1, 6]] num_comments = intheader[1, 7] # third line contains number of data points stats['npts'] = intheader[2, 0] problem_flag = intheader[2, 1] if problem_flag == 1: if not accept_flagged: fmt = 'SMC: Record found in file %s has a problem flag!' raise GMProcessException(fmt % filename) else: logging.warning( 'SMC: Data contains a problem flag for network/station: ' '%s/%s. See comments.' % (stats['network'], stats['station'])) stype = intheader[2, 2] if stype == missing_data: stype = np.nan elif stype not in STRUCTURES: # structure type is not defined and should will be considered 'other' stype = 4 fmt = 'SMC: Record found in file %s is not a free-field sensor!' standard['structure_type'] = STRUCTURES[stype] if standard['structure_type'] == 'building' and not any_structure: raise Exception(fmt % filename) format_specific['building_floor'] = np.nan if intheader[3, 0] != missing_data: format_specific['building_floor'] = intheader[3, 0] format_specific['bridge_number_spans'] = np.nan if intheader[3, 1] != missing_data: format_specific['bridge_number_spans'] = intheader[3, 1] format_specific['bridge_transducer_location'] = BRIDGE_LOCATIONS[0] if intheader[3, 2] != missing_data: bridge_number = intheader[3, 2] format_specific['bridge_transducer_location'] = \ BRIDGE_LOCATIONS[bridge_number] format_specific['dam_transducer_location'] = DAM_LOCATIONS[0] if intheader[3, 3] != missing_data: dam_number = intheader[3, 3] format_specific['dam_transducer_location'] = DAM_LOCATIONS[dam_number] c1 = format_specific['bridge_transducer_location'].find('free field') == -1 c2 = format_specific['dam_transducer_location'].find('free field') == -1 if (c1 or c2) and not any_structure: raise Exception(fmt % filename) format_specific['construction_type'] = CONSTRUCTION_TYPES[4] if intheader[3, 4] != missing_data: format_specific['construction_type'] = \ CONSTRUCTION_TYPES[intheader[3, 4]] # station is repeated here if all numeric if not len(stats['station']): stats['station'] = '%i' % intheader[3, 5] # read float header data skip = ASCII_HEADER_LINES + INTEGER_HEADER_LINES floatheader = np.genfromtxt(filename, max_rows=FLOAT_HEADER_LINES, skip_header=skip, delimiter=FLOAT_HEADER_WIDTHS) # float headers are 10 lines of 5 floats each missing_data = floatheader[0, 0] stats['sampling_rate'] = floatheader[0, 1] if stats['sampling_rate'] >= MAX_ALLOWED_SAMPLE_RATE: fmt = 'Sampling rate of %.2g samples/second is nonsensical.' raise Exception(fmt % stats['sampling_rate']) coordinates['latitude'] = floatheader[2, 0] # the documentation for SMC says that sometimes longitudes are # positive in the western hemisphere. Since it is very unlikely # any of these files exist for the eastern hemisphere, check for # positive longitudes and fix them. lon = floatheader[2, 1] if lon > 0: lon = -1 * lon coordinates['longitude'] = lon coordinates['elevation'] = 0.0 if floatheader[2, 2] != missing_data: coordinates['elevation'] = floatheader[2, 2] else: logging.warning('Setting elevation to 0.0') # figure out the channel code if format_specific['vertical_orientation'] in [0, 180]: stats['channel'] = get_channel_name(stats['sampling_rate'], is_acceleration=True, is_vertical=True, is_north=False) else: ho = standard['horizontal_orientation'] quad1 = ho > 315 and ho <= 360 quad2 = ho > 0 and ho <= 45 quad3 = ho > 135 and ho <= 225 if quad1 or quad2 or quad3: stats['channel'] = get_channel_name(stats['sampling_rate'], is_acceleration=True, is_vertical=False, is_north=True) else: stats['channel'] = get_channel_name(stats['sampling_rate'], is_acceleration=True, is_vertical=False, is_north=False) logging.debug('channel: %s' % stats['channel']) sensor_frequency = floatheader[4, 1] standard['instrument_period'] = 1 / sensor_frequency standard['instrument_damping'] = floatheader[4, 2] standard['corner_frequency'] = floatheader[3, 4] format_specific['filter_poles'] = floatheader[4, 0] standard['units'] = 'acc' standard['units_type'] = get_units_type(stats['channel']) # this field can be used for instrument correction # when data is in counts standard['instrument_sensitivity'] = np.nan # read in the comment lines with open(filename) as f: skip = ASCII_HEADER_LINES + INTEGER_HEADER_LINES + FLOAT_HEADER_LINES _ = [next(f) for x in range(skip)] standard['comments'] = [ next(f).strip().lstrip('|') for x in range(num_comments) ] standard['comments'] = ' '.join(standard['comments']) stats['coordinates'] = coordinates stats['standard'] = standard stats['format_specific'] = format_specific head, tail = os.path.split(filename) stats['standard']['source_file'] = tail or os.path.basename(head) return (stats, num_comments)
def _get_header_info(int_data, flt_data, lines, volume, location=''): """Return stats structure from various headers. Output is a dictionary like this: - network (str): 'LA' - station (str) - channel (str): Determined using COSMOS_ORIENTATIONS - location (str): Default is '--' - starttime (datetime) - duration (float) - sampling_rate (float) - npts (int) - coordinates: - latitude (float) - longitude (float) - elevation (float) - standard (Defaults are either np.nan or '') - horizontal_orientation (float): Rotation from north (degrees) - instrument_period (float): Period of sensor (Hz) - instrument_damping (float): Fraction of critical - process_time (datetime): Reported date of processing - process_level: Either 'V0', 'V1', 'V2', or 'V3' - station_name (str): Long form station description - sensor_serial_number (str): Reported sensor serial - instrument (str): See SENSOR_TYPES - comments (str): Processing comments - structure_type (str): See BUILDING_TYPES - corner_frequency (float): Sensor corner frequency (Hz) - units (str): See UNITS - source (str): Network source description - source_format (str): Always cosmos - format_specific - fractional_unit (float): Units of digitized acceleration in file (fractions of g) Args: int_data (ndarray): Array of integer data flt_data (ndarray): Array of float data lines (list): List of text headers (str) Returns: dictionary: Dictionary of header/metadata information """ hdr = {} coordinates = {} standard = {} format_specific = {} if volume == 'V1': hdr['duration'] = flt_data[2] hdr['npts'] = int_data[27] hdr['sampling_rate'] = (hdr['npts'] - 1) / hdr['duration'] # Get required parameter number hdr['network'] = 'LA' hdr['station'] = str(int_data[8]) logging.debug('station: %s' % hdr['station']) horizontal_angle = int_data[26] logging.debug('horizontal: %s' % horizontal_angle) if (horizontal_angle in USC_ORIENTATIONS or (horizontal_angle >= 0 and horizontal_angle <= 360)): if horizontal_angle in USC_ORIENTATIONS: channel = USC_ORIENTATIONS[horizontal_angle][1].upper() if channel == 'UP' or channel == 'DOWN' or channel == 'VERT': channel = get_channel_name( hdr['sampling_rate'], is_acceleration=True, is_vertical=True, is_north=False) horizontal_angle = 0.0 elif ( horizontal_angle > 315 or horizontal_angle < 45 or (horizontal_angle > 135 and horizontal_angle < 225) ): channel = get_channel_name( hdr['sampling_rate'], is_acceleration=True, is_vertical=False, is_north=True) else: channel = get_channel_name( hdr['sampling_rate'], is_acceleration=True, is_vertical=False, is_north=False) horizontal_orientation = horizontal_angle hdr['channel'] = channel logging.debug('channel: %s' % hdr['channel']) else: errstr = ('USC: Not enough information to distinguish horizontal from ' 'vertical channels.') raise GMProcessException(errstr) if location == '': hdr['location'] = '--' else: hdr['location'] = location month = str(int_data[21]) day = str(int_data[22]) year = str(int_data[23]) time = str(int_data[24]) tstr = month + '/' + day + '/' + year + '_' + time starttime = datetime.strptime(tstr, '%m/%d/%Y_%H%M') hdr['starttime'] = starttime # Get coordinates lat_deg = int_data[9] lat_min = int_data[10] lat_sec = int_data[11] lon_deg = int_data[12] lon_min = int_data[13] lon_sec = int_data[14] # Check for southern hemisphere, default is northern if lines[4].find('STATION USC#') >= 0: idx = lines[4].find('STATION USC#') + 12 if 'S' in lines[4][idx:]: lat_sign = -1 else: lat_sign = 1 else: lat_sign = 1 # Check for western hemisphere, default is western if lines[4].find('STATION USC#') >= 0: idx = lines[4].find('STATION USC#') + 12 if 'W' in lines[4][idx:]: lon_sign = -1 else: lon_sign = 1 else: lon_sign = -1 latitude = lat_sign * _dms2dd(lat_deg, lat_min, lat_sec) longitude = lon_sign * _dms2dd(lon_deg, lon_min, lon_sec) # Since sometimes longitudes are positive in this format for data in # the western hemisphere, we "fix" it here. Hopefully no one in the # eastern hemisphere uses this format! if longitude > 0: longitude = -longitude coordinates['latitude'] = latitude coordinates['longitude'] = longitude logging.warn('Setting elevation to 0.0') coordinates['elevation'] = 0.0 # Get standard paramaters standard['units_type'] = get_units_type(hdr['channel']) standard['horizontal_orientation'] = float(horizontal_orientation) standard['instrument_period'] = flt_data[0] standard['instrument_damping'] = flt_data[1] standard['process_time'] = '' station_line = lines[5] station_length = int(lines[5][72:74]) name = station_line[:station_length] standard['station_name'] = name standard['sensor_serial_number'] = '' standard['instrument'] = '' standard['comments'] = '' standard['units'] = 'acc' standard['structure_type'] = '' standard['process_level'] = PROCESS_LEVELS['V1'] standard['corner_frequency'] = np.nan standard['source'] = ('Los Angeles Basin Seismic Network, University ' 'of Southern California') standard['source_format'] = 'usc' # this field can be used for instrument correction # when data is in counts standard['instrument_sensitivity'] = np.nan # Get format specific format_specific['fractional_unit'] = flt_data[4] # Set dictionary hdr['standard'] = standard hdr['coordinates'] = coordinates hdr['format_specific'] = format_specific return hdr