def __init__( self, stats, paz=None, parser=None, skip_on_gaps=False, is_rotational_data=False, db_bins=(-200, -50, 1.0), ppsd_length=3600.0, overlap=0.5, water_level=600.0, ): """ Initialize the PPSD object setting all fixed information on the station that should not change afterwards to guarantee consistent spectral estimates. The necessary instrument response information can be provided in two ways: * Providing an `obspy.io.xseed` :class:`~obspy.io.xseed.parser.Parser`, e.g. containing metadata from a Dataless SEED file. This is the safer way but it might a bit slower because for every processed time segment the response information is extracted from the parser. * Providing a dictionary containing poles and zeros information. Be aware that this leads to wrong results if the instrument's response is changing with data added to the PPSD. Use with caution! :note: When using `is_rotational_data=True` the applied processing steps are changed. Differentiation of data (converting velocity to acceleration data) will be omitted and a flat instrument response is assumed, leaving away response removal and only dividing by `paz['sensitivity']` specified in the provided `paz` dictionary (other keys do not have to be present then). For scaling factors that are usually multiplied to the data remember to use the inverse as `paz['sensitivity']`. :type stats: :class:`~obspy.core.trace.Stats` :param stats: Stats of the station/instrument to process :type paz: dict, optional :param paz: Response information of instrument. If not specified the information is supposed to be present as stats.paz. :type parser: :class:`obspy.io.xseed.parser.Parser`, optional :param parser: Parser instance with response information (e.g. read from a Dataless SEED volume) :type skip_on_gaps: bool, optional :param skip_on_gaps: Determines whether time segments with gaps should be skipped entirely. [McNamara2004]_ merge gappy traces by filling with zeros. This results in a clearly identifiable outlier psd line in the PPSD visualization. Select `skip_on_gaps=True` for not filling gaps with zeros which might result in some data segments shorter than `ppsd_length` not used in the PPSD. :type is_rotational_data: bool, optional :param is_rotational_data: If set to True adapt processing of data to rotational data. See note for details. :type db_bins: tuple of three ints/floats :param db_bins: Specify the lower and upper boundary and the width of the db bins. The bin width might get adjusted to fit a number of equally spaced bins in between the given boundaries. :type ppsd_length: float, optional :param ppsd_length: Length of data segments passed to psd in seconds. In the paper by [McNamara2004]_ a value of 3600 (1 hour) was chosen. Longer segments increase the upper limit of analyzed periods but decrease the number of analyzed segments. :type overlap: float, optional :param overlap: Overlap of segments passed to psd. Overlap may take values between 0 and 1 and is given as fraction of the length of one segment, e.g. `ppsd_length=3600` and `overlap=0.5` result in an overlap of 1800s of the segments. :type water_level: float, optional :param water_level: Water level used in instrument correction. """ if paz is not None and parser is not None: msg = "Both paz and parser specified. Using parser object for " "metadata." warnings.warn(msg) self.id = "%(network)s.%(station)s.%(location)s.%(channel)s" % stats self.network = stats.network self.station = stats.station self.location = stats.location self.channel = stats.channel self.sampling_rate = stats.sampling_rate self.delta = 1.0 / self.sampling_rate self.is_rotational_data = is_rotational_data self.ppsd_length = ppsd_length self.overlap = overlap self.water_level = water_level # trace length for one segment self.len = int(self.sampling_rate * ppsd_length) # set paz either from kwarg or try to get it from stats self.paz = paz self.parser = parser if skip_on_gaps: self.merge_method = -1 else: self.merge_method = 0 # nfft is determined mimicking the fft setup in McNamara&Buland paper: # (they take 13 segments overlapping 75% and truncate to next lower # power of 2) # - take number of points of whole ppsd segment (default 1 hour) self.nfft = ppsd_length * self.sampling_rate # - make 13 single segments overlapping by 75% # (1 full segment length + 25% * 12 full segment lengths) self.nfft = self.nfft / 4.0 # - go to next smaller power of 2 for nfft self.nfft = prev_pow_2(self.nfft) # - use 75% overlap (we end up with a little more than 13 segments..) self.nlap = int(0.75 * self.nfft) self.times_used = [] self.times = self.times_used self.times_data = [] self.times_gaps = [] self.hist_stack = None self.__setup_bins() # set up the binning for the db scale num_bins = int((db_bins[1] - db_bins[0]) / db_bins[2]) self.spec_bins = np.linspace(db_bins[0], db_bins[1], num_bins + 1, endpoint=True) self.colormap = LinearSegmentedColormap("mcnamara", CDICT, 1024)
def __init__(self, stats, paz=None, dataless=None, skip_on_gaps=False): """ Initialize the PPSD object setting all fixed information on the station that should not change afterwards to guarantee consistent spectral estimates. The necessary instrument response information can be provided in two ways: * Providing a dataless file. This is the safer way but it might a bit slower because for every processed time segment the response information is extracted from the parser. * Providing a dictionary containing poles and zeros information. Be aware that this leads to wrong results if the instrument's response is changing with data added to the PPSD. Use with caution! :type stats: :class:`~obspy.core.trace.Stats` :param stats: Stats of the station/instrument to process :type paz: dict (optional) :param paz: Response information of instrument. If not specified the information is supposed to be present as stats.paz. :type dataless: String (optional) :param dataless: Dataless file with response information :type skip_on_gaps: Boolean (optional) :param skip_on_gaps: Determines whether time segments with gaps should be skipped entirely. McNamara & Buland merge gappy traces by filling with zeros. This results in a clearly identifiable outlier psd line in the PPSD visualization. Select `skip_on_gaps=True` for not filling gaps with zeros which might result in some data segments shorter than 1 hour not used in the PPSD. """ # check if matplotlib is available, no official dependency for # obspy.signal if MATPLOTLIB_VERSION is None: raise ImportError(msg_matplotlib_ImportError) if paz is not None and dataless is not None: msg = "Both paz and parser specified. Using parser object for " \ "metadata." warnings.warn(msg) self.id = "%(network)s.%(station)s.%(location)s.%(channel)s" % stats self.network = stats.network self.station = stats.station self.location = stats.location self.channel = stats.channel self.sampling_rate = stats.sampling_rate self.delta = 1.0 / self.sampling_rate # trace length for one hour piece self.len = int(self.sampling_rate * PPSD_LENGTH) # set paz either from kwarg or try to get it from stats self.paz = paz self.dataless = dataless self.parser = Parser(dataless) if skip_on_gaps: self.merge_method = -1 else: self.merge_method = 0 # nfft is determined mimicing the fft setup in McNamara&Buland paper: # (they take 13 segments overlapping 75% and truncate to next lower # power of 2) # - take number of points of whole ppsd segment (currently 1 hour) self.nfft = PPSD_LENGTH * self.sampling_rate # - make 13 single segments overlapping by 75% # (1 full segment length + 25% * 12 full segment lengths) self.nfft = self.nfft / 4.0 # - go to next smaller power of 2 for nfft self.nfft = prev_pow_2(self.nfft) # - use 75% overlap (we end up with a little more than 13 segments..) self.nlap = int(0.75 * self.nfft) self.times_used = [] self.times = self.times_used self.times_data = [] self.times_gaps = [] self.hist_stack = None self.psd = [] self.spikes = [] self.__setup_bins()
def psd_values(psd_periods, tr, metadata, special_handling=None, period_smoothing_width_octaves=1.0, period_step_octaves=0.125, method='old'): """ Calculates the power spectral density (psd) of the given trace `tr`, and returns the values in dB at the given `psd_periods`. Note: all optional parameters should be left as they are, as the given parameter where those used for training. For any further information, see :class:`~obspy.signal.spectral_estimation.PPSD` and :class:`~obspy.signal.spectral_estimation.PPSD.__process` :psd_periods: numeric list/array of periods (in second) :param tr: obspy Trace :param metadata: Response information of instrument. It must be a :class:`~obspy.core.inventory.inventory.Inventory` (e.g. read from a StationXML file using :func:`~obspy.core.inventory.inventory.read_inventory` or fetched from a :mod:`FDSN <obspy.clients.fdsn>` webservice) """ # if trace has a masked array we fill in zeros try: tr.data[tr.data.mask] = 0.0 # if it is no masked array, we get an AttributeError # and have nothing to do except AttributeError: pass # merging some PPSD.__init__ stuff here: ppsd_length = tr.stats.endtime - tr.stats.starttime # float, seconds stats = tr.stats sampling_rate = stats.sampling_rate # calculate derived attributes # nfft is determined mimicking the fft setup in McNamara&Buland # paper: # (they take 13 segments overlapping 75% and truncate to next lower # power of 2) # - take number of points of whole ppsd segment (default 1 hour) nfft = ppsd_length * sampling_rate # - make 13 single segments overlapping by 75% # (1 full segment length + 25% * 12 full segment lengths) nfft = nfft / 4.0 # - go to next smaller power of 2 for nfft nfft = prev_pow_2(nfft) # - use 75% overlap # (we end up with a little more than 13 segments..) nlap = int(0.75 * nfft) # calculate the specturm. Using matlab for this seems weird (as the PPSD # has a strong focus on outputting plots, it makes sense, here not so much) # but the function basically computes an fft and then its power spectrum. # (also remember: matlab will be always available as ObsPy dependency) spec, _freq = mlab.psd(tr.data, nfft, sampling_rate, detrend=mlab.detrend_linear, window=fft_taper, noverlap=nlap, sides='onesided', scale_by_freq=True) # leave out first entry (offset) spec = spec[1:] freq = _freq[1:] # working with the periods not frequencies later so reverse spectrum spec = spec[::-1] # Here we remove the response using the same conventions # since the power is squared we want to square the sensitivity # we can also convert to acceleration if we have non-rotational data if special_handling == "ringlaser": # in case of rotational data just remove sensitivity spec /= metadata['sensitivity']**2 # special_handling "hydrophone" does instrument correction same as # "normal" data else: # determine instrument response from metadata try: resp = _get_response(tr, metadata, nfft) except Exception as e: msg = ("Error getting response from provided metadata:\n" "%s: %s\n" "Skipping time segment(s).") msg = msg % (e.__class__.__name__, str(e)) # warnings.warn(msg) # return False raise ValueError(msg) resp = resp[1:] resp = resp[::-1] # Now get the amplitude response (squared) respamp = np.absolute(resp * np.conjugate(resp)) # Make omega with the same conventions as spec w = 2.0 * math.pi * freq w = w[::-1] # Here we do the response removal # Do not differentiate when `special_handling="hydrophone"` if special_handling == "hydrophone": spec = spec / respamp else: spec = (w**2) * spec / respamp # avoid calculating log of zero idx = spec < dtiny spec[idx] = dtiny # go to dB spec = np.log10(spec) spec *= 10 # setup variables for the final smoothed spectral values: smoothed_psd = [] _psd_periods = 1.0 / freq[::-1] psd_periods = np.asarray(psd_periods) if method == 'old': # smooth the spectrum: for any period P in psd_periods[i] compute a # time-dependent range [Pmin, Pmax] around P, and then compute the # smoothed spectrum at index i as the mean of spec on [Pmin, Pmax]. # and computing their mean: for any period P in psd_periods we compute # the smoothed spectrum on the period immediately before and after P, # we append those two "bounding" values to an array, and we later # linearly interpolate the array with our psd_values period_bin_centers = [] period_limits = (_psd_periods[0], _psd_periods[-1]) # calculate smoothed periods for periods_bins in \ _setup_yield_period_binning(psd_periods, period_smoothing_width_octaves, period_step_octaves, period_limits): period_bin_left, period_bin_center, period_bin_right = periods_bins _spec_slice = spec[(period_bin_left <= _psd_periods) & (_psd_periods <= period_bin_right)] smoothed_psd.append(_spec_slice.mean()) period_bin_centers.append(period_bin_center) # interpolate. Use log10 as it was used for training (from tests, # linear interpolation does not change much anyway) val = np.interp(np.log10(psd_periods), np.log10(period_bin_centers), smoothed_psd) val[psd_periods < period_bin_centers[0]] = np.nan val[psd_periods > period_bin_centers[-1]] = np.nan else: # the width of frequencies we average over for every bin is controlled # by period_smoothing_width_octaves (default one full octave) period_smoothing_width_factor = \ 2 ** period_smoothing_width_octaves period_smoothing_width_factor_sqrt = \ (period_smoothing_width_factor ** 0.5) # period_bins_left = psd_periods / period_smoothing_width_factor_sqrt # period_bins_right = period_bins_left * period_smoothing_width_factor # period_bins_left = period_bins_left.reshape((len(psd_periods), 1)) # period_bins_right = period_bins_right.reshape((len(psd_periods), 1)) # spc_tiled = np.tile(spec, (len(period_bins_left), 1)) # spc_tiled[(period_bins_left > _psd_periods) | # (_psd_periods > period_bins_right)] = np.nan # val = np.nanmean(spc_tiled, axis=1) for psd_period in psd_periods: # calculate left/right edge and center of psd_period bin # set first smoothing bin's left edge such that the center # frequency is psd_period period_bin_left = psd_period / period_smoothing_width_factor_sqrt period_bin_right = period_bin_left * period_smoothing_width_factor id1 = np.searchsorted(_psd_periods, period_bin_left, side='left') id2 = np.searchsorted(_psd_periods, period_bin_right, side='right') smoothed_psd.append(spec[id1:id2].mean()) # _spec_slice = spec[(period_bin_left <= _psd_periods) & # (_psd_periods <= period_bin_right)] # smoothed_psd.append(_spec_slice.mean()) val = np.array(smoothed_psd) return val
def __init__(self, stats, metadata, skip_on_gaps=False, is_rotational_data=False, db_bins=(-200, -50, 1.), ppsd_length=3600., overlap=0.5, **kwargs): """ Initialize the PPSD object setting all fixed information on the station that should not change afterwards to guarantee consistent spectral estimates. The necessary instrument response information can be provided in several ways using the `metadata` keyword argument: * Providing an :class:`~obspy.core.inventory.inventory.Inventory` object (e.g. read from a StationXML file using :func:`~obspy.core.inventory.inventory.read_inventory` or fetched from a :mod:`FDSN <obspy.clients.fdsn>` webservice). * Providing an :class:`obspy.io.xseed Parser <obspy.io.xseed.parser.Parser>`, (e.g. containing metadata from a Dataless SEED file). * Providing the filename/path to a local RESP file. * Providing a dictionary containing poles and zeros information. Be aware that this leads to wrong results if the instrument's response is changing over the timespans that are added to the PPSD. Use with caution! :note: When using `is_rotational_data=True` the applied processing steps are changed (and it is assumed that a dictionary is provided as `metadata`). Differentiation of data (converting velocity to acceleration data) will be omitted and a flat instrument response is assumed, leaving away response removal and only dividing by `metadata['sensitivity']` specified in the provided `metadata` dictionary (other keys do not have to be present then). For scaling factors that are usually multiplied to the data remember to use the inverse as `metadata['sensitivity']`. :type stats: :class:`~obspy.core.trace.Stats` :param stats: Stats of the station/instrument to process :type metadata: :class:`~obspy.core.inventory.inventory.Inventory` or :class:`~obspy.io.xseed Parser` or str or dict :param metadata: Response information of instrument. See above notes for details. :type skip_on_gaps: bool, optional :param skip_on_gaps: Determines whether time segments with gaps should be skipped entirely. [McNamara2004]_ merge gappy traces by filling with zeros. This results in a clearly identifiable outlier psd line in the PPSD visualization. Select `skip_on_gaps=True` for not filling gaps with zeros which might result in some data segments shorter than `ppsd_length` not used in the PPSD. :type is_rotational_data: bool, optional :param is_rotational_data: If set to True adapt processing of data to rotational data. See note for details. :type db_bins: tuple of three ints/floats :param db_bins: Specify the lower and upper boundary and the width of the db bins. The bin width might get adjusted to fit a number of equally spaced bins in between the given boundaries. :type ppsd_length: float, optional :param ppsd_length: Length of data segments passed to psd in seconds. In the paper by [McNamara2004]_ a value of 3600 (1 hour) was chosen. Longer segments increase the upper limit of analyzed periods but decrease the number of analyzed segments. :type overlap: float, optional :param overlap: Overlap of segments passed to psd. Overlap may take values between 0 and 1 and is given as fraction of the length of one segment, e.g. `ppsd_length=3600` and `overlap=0.5` result in an overlap of 1800s of the segments. """ self.id = "%(network)s.%(station)s.%(location)s.%(channel)s" % stats self.network = stats.network self.station = stats.station self.location = stats.location self.channel = stats.channel self.sampling_rate = stats.sampling_rate self.delta = 1.0 / self.sampling_rate self.is_rotational_data = is_rotational_data self.ppsd_length = ppsd_length self.overlap = overlap # trace length for one segment self.len = int(self.sampling_rate * ppsd_length) self.metadata = metadata if skip_on_gaps: self.merge_method = -1 else: self.merge_method = 0 # nfft is determined mimicking the fft setup in McNamara&Buland paper: # (they take 13 segments overlapping 75% and truncate to next lower # power of 2) # - take number of points of whole ppsd segment (default 1 hour) self.nfft = ppsd_length * self.sampling_rate # - make 13 single segments overlapping by 75% # (1 full segment length + 25% * 12 full segment lengths) self.nfft = self.nfft / 4.0 # - go to next smaller power of 2 for nfft self.nfft = prev_pow_2(self.nfft) # - use 75% overlap (we end up with a little more than 13 segments..) self.nlap = int(0.75 * self.nfft) self._times_used = [] self._times_data = [] self._times_gaps = [] self.hist_stack = None self.__setup_bins() # set up the binning for the db scale num_bins = int((db_bins[1] - db_bins[0]) / db_bins[2]) self.spec_bins = np.linspace(db_bins[0], db_bins[1], num_bins + 1, endpoint=True)
def _old_psd_values(psd_periods, tr, metadata, special_handling=None, period_smoothing_width_octaves=1.0, period_step_octaves=0.125, smooth_on_all_periods=False): """ Old implementation of psd_values. See :func:`sdaas.core.psd_values` """ from matplotlib import mlab from obspy.signal.util import prev_pow_2 from obspy.signal.spectral_estimation import dtiny, fft_taper import math from sdaas.core.psd import _get_response, _setup_yield_period_binning, \ _yield_period_binning # Convert to float, this is only necessary if in-place operations follow, # which is the case e.g. for the fft_taper function (see below) # (tested with mlab 3.2.2 and obspy 1.1.1) tr.data = tr.data.astype(np.float64) # if trace has a masked array we fill in zeros try: tr.data[tr.data.mask] = 0.0 # if it is no masked array, we get an AttributeError # and have nothing to do except AttributeError: pass # merging some PPSD.__init__ stuff here: ppsd_length = tr.stats.endtime - tr.stats.starttime # float, seconds stats = tr.stats sampling_rate = stats.sampling_rate # calculate derived attributes # nfft is determined mimicking the fft setup in McNamara&Buland # paper: # (they take 13 segments overlapping 75% and truncate to next lower # power of 2) # - take number of points of whole ppsd segment (default 1 hour) nfft = ppsd_length * sampling_rate # - make 13 single segments overlapping by 75% # (1 full segment length + 25% * 12 full segment lengths) nfft = nfft / 4.0 # - go to next smaller power of 2 for nfft nfft = prev_pow_2(nfft) # - use 75% overlap # (we end up with a little more than 13 segments..) nlap = int(0.75 * nfft) # calculate the specturm. Using matlab for this seems weird (as the PPSD # has a strong focus on outputting plots, it makes sense, here not so much) # but the function basically computes an fft and then its power spectrum. # (also remember: matlab will be always available as ObsPy dependency) spec, _freq = mlab.psd(tr.data, nfft, sampling_rate, detrend=mlab.detrend_linear, window=fft_taper, noverlap=nlap, sides='onesided', scale_by_freq=True) # leave out first entry (offset) spec = spec[1:] freq = _freq[1:] # working with the periods not frequencies later so reverse spectrum spec = spec[::-1] # Here we remove the response using the same conventions # since the power is squared we want to square the sensitivity # we can also convert to acceleration if we have non-rotational data if special_handling == "ringlaser": # in case of rotational data just remove sensitivity spec /= metadata['sensitivity'] ** 2 # special_handling "hydrophone" does instrument correction same as # "normal" data else: # determine instrument response from metadata try: resp = _get_response(tr, metadata, nfft) except Exception as e: msg = ("Error getting response from provided metadata:\n" "%s: %s\n" "Skipping time segment(s).") msg = msg % (e.__class__.__name__, str(e)) # warnings.warn(msg) # return False raise ValueError(msg) resp = resp[1:] resp = resp[::-1] # Now get the amplitude response (squared) respamp = np.absolute(resp * np.conjugate(resp)) # Make omega with the same conventions as spec w = 2.0 * math.pi * freq w = w[::-1] # Here we do the response removal # Do not differentiate when `special_handling="hydrophone"` if special_handling == "hydrophone": spec = spec / respamp else: spec = (w ** 2) * spec / respamp # avoid calculating log of zero idx = spec < dtiny spec[idx] = dtiny # go to dB spec = np.log10(spec) spec *= 10 # setup variables for the final smoothed spectral values: smoothed_psd = [] _psd_periods = 1.0 / freq[::-1] psd_periods = np.asarray(psd_periods) if smooth_on_all_periods: # smooth the spectrum: for any period P in psd_periods[i] compute a # time-dependent range [Pmin, Pmax] around P, and then compute the # smoothed spectrum at index i as the mean of spec on [Pmin, Pmax]. # and computing their mean: for any period P in psd_periods we compute # the smoothed spectrum on the period immediately before and after P, # we append those two "bounding" values to an array, and we later # linearly interpolate the array with our psd_values period_bin_centers = [] period_limits = (_psd_periods[0], _psd_periods[-1]) # calculate smoothed periods for periods_bins in \ _setup_yield_period_binning(psd_periods, period_smoothing_width_octaves, period_step_octaves, period_limits): period_bin_left, period_bin_center, period_bin_right = periods_bins _spec_slice = spec[(period_bin_left <= _psd_periods) & (_psd_periods <= period_bin_right)] smoothed_psd.append(_spec_slice.mean()) period_bin_centers.append(period_bin_center) # interpolate. Use log10 as it was used for training (from tests, # linear interpolation does not change much anyway) val = np.interp( np.log10(psd_periods), np.log10(period_bin_centers), smoothed_psd ) val[psd_periods < period_bin_centers[0]] = np.nan val[psd_periods > period_bin_centers[-1]] = np.nan else: # the width of frequencies we average over for every bin is controlled # by period_smoothing_width_octaves (default one full octave) for period_bin_left, period_bin_right in \ _yield_period_binning(psd_periods, period_smoothing_width_octaves): _spec_slice = spec[(period_bin_left <= _psd_periods) & (_psd_periods <= period_bin_right)] smoothed_psd.append(_spec_slice.mean() if len(_spec_slice) else np.nan) val = np.array(smoothed_psd) return val