def test_get_stream_gaps(): """geomag.TimeseriesUtility_test.test_get_stream_gaps confirms that gaps are found in a stream """ stream = Stream([ __create_trace('H', [numpy.nan, 1, 1, numpy.nan, numpy.nan]), __create_trace('Z', [0, 0, 0, 1, 1, 1]) ]) for trace in stream: # set time of first sample trace.stats.starttime = UTCDateTime('2015-01-01T00:00:00Z') # set sample rate to 1 second trace.stats.delta = 1 # find gaps gaps = TimeseriesUtility.get_stream_gaps(stream) assert_equals(len(gaps['H']), 2) # gap at start of H gap = gaps['H'][0] assert_equals(gap[0], UTCDateTime('2015-01-01T00:00:00Z')) assert_equals(gap[1], UTCDateTime('2015-01-01T00:00:00Z')) # gap at end of H gap = gaps['H'][1] assert_equals(gap[0], UTCDateTime('2015-01-01T00:00:03Z')) assert_equals(gap[1], UTCDateTime('2015-01-01T00:00:04Z')) # no gaps in Z channel assert_equals(len(gaps['Z']), 0)
def run_as_update(self, options): """Updates data. Parameters ---------- options: dictionary The dictionary of all the command line arguments. Could in theory contain other options passed in by the controller. Notes ----- Finds gaps in the target data, and if there's new data in the input source, calls run with the start/end time of a given gap to fill in. It checks the start of the target data, and if it's missing, and there's new data available, it backs up the starttime/endtime, and recursively calls itself, to check the previous period, to see if new data is available there as well. Calls run for each new period, oldest to newest. """ algorithm = self._algorithm input_channels = options.inchannels or \ algorithm.get_input_channels() output_channels = options.outchannels or \ algorithm.get_output_channels() # request output to see what has already been generated output_timeseries = self._get_output_timeseries( observatory=options.observatory, starttime=options.starttime, endtime=options.endtime, channels=output_channels) delta = output_timeseries[0].stats.delta # find gaps in output, so they can be updated output_gaps = TimeseriesUtility.get_merged_gaps( TimeseriesUtility.get_stream_gaps(output_timeseries)) for output_gap in output_gaps: input_timeseries = self._get_input_timeseries( observatory=options.observatory, starttime=output_gap[0], endtime=output_gap[1], channels=input_channels) if not algorithm.can_produce_data( starttime=output_gap[0], endtime=output_gap[1], stream=input_timeseries): continue # check for fillable gap at start if output_gap[0] == options.starttime: # found fillable gap at start, recurse to previous interval interval = options.endtime - options.starttime starttime = options.starttime - interval - delta endtime = options.starttime - delta options.starttime = starttime options.endtime = endtime self.run_as_update(options) # fill gap options.starttime = output_gap[0] options.endtime = output_gap[1] self.run(options)
def run_as_update(self, options): """Updates data. Parameters ---------- options: dictionary The dictionary of all the command line arguments. Could in theory contain other options passed in by the controller. Notes ----- Finds gaps in the target data, and if there's new data in the input source, calls run with the start/end time of a given gap to fill in. It checks the start of the target data, and if it's missing, and there's new data available, it backs up the starttime/endtime, and recursively calls itself, to check the previous period, to see if new data is available there as well. Calls run for each new period, oldest to newest. """ algorithm = self._algorithm input_channels = options.inchannels or \ algorithm.get_input_channels() output_channels = options.outchannels or \ algorithm.get_output_channels() # request output to see what has already been generated output_timeseries = self._get_output_timeseries( observatory=options.observatory, starttime=options.starttime, endtime=options.endtime, channels=output_channels) delta = output_timeseries[0].stats.delta # find gaps in output, so they can be updated output_gaps = TimeseriesUtility.get_merged_gaps( TimeseriesUtility.get_stream_gaps(output_timeseries)) for output_gap in output_gaps: input_timeseries = self._get_input_timeseries( observatory=options.observatory, starttime=output_gap[0], endtime=output_gap[1], channels=input_channels) if not algorithm.can_produce_data(starttime=output_gap[0], endtime=output_gap[1], stream=input_timeseries): continue # check for fillable gap at start if output_gap[0] == options.starttime: # found fillable gap at start, recurse to previous interval interval = options.endtime - options.starttime starttime = options.starttime - interval - delta endtime = options.starttime - delta options.starttime = starttime options.endtime = endtime self.run_as_update(options) # fill gap options.starttime = output_gap[0] options.endtime = output_gap[1] self.run(options)
def can_produce_data(self, starttime, endtime, stream): """Can Product data Parameters ---------- starttime: UTCDateTime start time of requested output end : UTCDateTime end time of requested output stream: obspy.core.Stream The input stream we want to make certain has data for the algorithm """ input_gaps = TimeseriesUtility.get_merged_gaps( TimeseriesUtility.get_stream_gaps(stream)) for input_gap in input_gaps: # Check for gaps that include the entire range if (starttime >= input_gap[0] and starttime <= input_gap[1] and endtime < input_gap[2]): return False return True
def run_as_update(self, options, update_count=0): """Updates data. Parameters ---------- options: dictionary The dictionary of all the command line arguments. Could in theory contain other options passed in by the controller. Notes ----- Finds gaps in the target data, and if there's new data in the input source, calls run with the start/end time of a given gap to fill in. It checks the start of the target data, and if it's missing, and there's new data available, it backs up the starttime/endtime, and recursively calls itself, to check the previous period, to see if new data is available there as well. Calls run for each new period, oldest to newest. """ # If an update_limit is set, make certain we don't step past it. if options.update_limit != 0: if update_count >= options.update_limit: return print >> sys.stderr, 'checking gaps', \ options.starttime, options.endtime algorithm = self._algorithm input_channels = options.inchannels or \ algorithm.get_input_channels() output_channels = options.outchannels or \ algorithm.get_output_channels() # request output to see what has already been generated output_timeseries = self._get_output_timeseries( observatory=options.observatory, starttime=options.starttime, endtime=options.endtime, channels=output_channels) if len(output_timeseries) > 0: # find gaps in output, so they can be updated output_gaps = TimeseriesUtility.get_merged_gaps( TimeseriesUtility.get_stream_gaps(output_timeseries)) else: output_gaps = [[ options.starttime, options.endtime, # next sample time not used None ]] for output_gap in output_gaps: input_timeseries = self._get_input_timeseries( observatory=options.observatory, starttime=output_gap[0], endtime=output_gap[1], channels=input_channels) if not algorithm.can_produce_data(starttime=output_gap[0], endtime=output_gap[1], stream=input_timeseries): continue # check for fillable gap at start if output_gap[0] == options.starttime: # found fillable gap at start, recurse to previous interval interval = options.endtime - options.starttime starttime = options.starttime - interval endtime = options.starttime - 1 options.starttime = starttime options.endtime = endtime self.run_as_update(options, update_count + 1) # fill gap options.starttime = output_gap[0] options.endtime = output_gap[1] print >> sys.stderr, 'processing', \ options.starttime, options.endtime self.run(options, input_timeseries)
def run_as_update(self, options, update_count=0): """Updates data. Parameters ---------- options: dictionary The dictionary of all the command line arguments. Could in theory contain other options passed in by the controller. Notes ----- Finds gaps in the target data, and if there's new data in the input source, calls run with the start/end time of a given gap to fill in. It checks the start of the target data, and if it's missing, and there's new data available, it backs up the starttime/endtime, and recursively calls itself, to check the previous period, to see if new data is available there as well. Calls run for each new period, oldest to newest. """ # If an update_limit is set, make certain we don't step past it. if options.update_limit != 0: if update_count >= options.update_limit: return print >> sys.stderr, 'checking gaps', \ options.starttime, options.endtime algorithm = self._algorithm input_channels = options.inchannels or \ algorithm.get_input_channels() output_channels = options.outchannels or \ algorithm.get_output_channels() # request output to see what has already been generated output_timeseries = self._get_output_timeseries( observatory=options.observatory, starttime=options.starttime, endtime=options.endtime, channels=output_channels) if len(output_timeseries) > 0: # find gaps in output, so they can be updated output_gaps = TimeseriesUtility.get_merged_gaps( TimeseriesUtility.get_stream_gaps(output_timeseries)) else: output_gaps = [[ options.starttime, options.endtime, # next sample time not used None ]] for output_gap in output_gaps: input_timeseries = self._get_input_timeseries( observatory=options.observatory, starttime=output_gap[0], endtime=output_gap[1], channels=input_channels) if not algorithm.can_produce_data( starttime=output_gap[0], endtime=output_gap[1], stream=input_timeseries): continue # check for fillable gap at start if output_gap[0] == options.starttime: # found fillable gap at start, recurse to previous interval interval = options.endtime - options.starttime starttime = options.starttime - interval endtime = options.starttime - 1 options.starttime = starttime options.endtime = endtime self.run_as_update(options, update_count + 1) # fill gap options.starttime = output_gap[0] options.endtime = output_gap[1] print >> sys.stderr, 'processing', \ options.starttime, options.endtime self.run(options)