def run_as_update(self, options):
        """Updates data.
        Parameters
        ----------
        options: dictionary
            The dictionary of all the command line arguments. Could in theory
            contain other options passed in by the controller.

        Notes
        -----
        Finds gaps in the target data, and if there's new data in the input
            source, calls run with the start/end time of a given gap to fill
            in.
        It checks the start of the target data, and if it's missing, and
            there's new data available, it backs up the starttime/endtime,
            and recursively calls itself, to check the previous period, to see
            if new data is available there as well. Calls run for each new
            period, oldest to newest.
        """
        algorithm = self._algorithm
        input_channels = options.inchannels or \
                algorithm.get_input_channels()
        output_channels = options.outchannels or \
                algorithm.get_output_channels()
        # request output to see what has already been generated
        output_timeseries = self._get_output_timeseries(
                observatory=options.observatory,
                starttime=options.starttime,
                endtime=options.endtime,
                channels=output_channels)
        delta = output_timeseries[0].stats.delta
        # find gaps in output, so they can be updated
        output_gaps = TimeseriesUtility.get_merged_gaps(
                TimeseriesUtility.get_stream_gaps(output_timeseries))
        for output_gap in output_gaps:
            input_timeseries = self._get_input_timeseries(
                    observatory=options.observatory,
                    starttime=output_gap[0],
                    endtime=output_gap[1],
                    channels=input_channels)
            if not algorithm.can_produce_data(
                    starttime=output_gap[0],
                    endtime=output_gap[1],
                    stream=input_timeseries):
                continue
            # check for fillable gap at start
            if output_gap[0] == options.starttime:
                # found fillable gap at start, recurse to previous interval
                interval = options.endtime - options.starttime
                starttime = options.starttime - interval - delta
                endtime = options.starttime - delta
                options.starttime = starttime
                options.endtime = endtime
                self.run_as_update(options)
            # fill gap
            options.starttime = output_gap[0]
            options.endtime = output_gap[1]
            self.run(options)
Example #2
0
    def run_as_update(self, options):
        """Updates data.
        Parameters
        ----------
        options: dictionary
            The dictionary of all the command line arguments. Could in theory
            contain other options passed in by the controller.

        Notes
        -----
        Finds gaps in the target data, and if there's new data in the input
            source, calls run with the start/end time of a given gap to fill
            in.
        It checks the start of the target data, and if it's missing, and
            there's new data available, it backs up the starttime/endtime,
            and recursively calls itself, to check the previous period, to see
            if new data is available there as well. Calls run for each new
            period, oldest to newest.
        """
        algorithm = self._algorithm
        input_channels = options.inchannels or \
                algorithm.get_input_channels()
        output_channels = options.outchannels or \
                algorithm.get_output_channels()
        # request output to see what has already been generated
        output_timeseries = self._get_output_timeseries(
            observatory=options.observatory,
            starttime=options.starttime,
            endtime=options.endtime,
            channels=output_channels)
        delta = output_timeseries[0].stats.delta
        # find gaps in output, so they can be updated
        output_gaps = TimeseriesUtility.get_merged_gaps(
            TimeseriesUtility.get_stream_gaps(output_timeseries))
        for output_gap in output_gaps:
            input_timeseries = self._get_input_timeseries(
                observatory=options.observatory,
                starttime=output_gap[0],
                endtime=output_gap[1],
                channels=input_channels)
            if not algorithm.can_produce_data(starttime=output_gap[0],
                                              endtime=output_gap[1],
                                              stream=input_timeseries):
                continue
            # check for fillable gap at start
            if output_gap[0] == options.starttime:
                # found fillable gap at start, recurse to previous interval
                interval = options.endtime - options.starttime
                starttime = options.starttime - interval - delta
                endtime = options.starttime - delta
                options.starttime = starttime
                options.endtime = endtime
                self.run_as_update(options)
            # fill gap
            options.starttime = output_gap[0]
            options.endtime = output_gap[1]
            self.run(options)
Example #3
0
def test_get_stream_gaps():
    """geomag.TimeseriesUtility_test.test_get_stream_gaps

    confirms that gaps are found in a stream
    """
    stream = Stream([
        __create_trace('H', [numpy.nan, 1, 1, numpy.nan, numpy.nan]),
        __create_trace('Z', [0, 0, 0, 1, 1, 1])
    ])
    for trace in stream:
        # set time of first sample
        trace.stats.starttime = UTCDateTime('2015-01-01T00:00:00Z')
        # set sample rate to 1 second
        trace.stats.delta = 1
    # find gaps
    gaps = TimeseriesUtility.get_stream_gaps(stream)
    assert_equals(len(gaps['H']), 2)
    # gap at start of H
    gap = gaps['H'][0]
    assert_equals(gap[0], UTCDateTime('2015-01-01T00:00:00Z'))
    assert_equals(gap[1], UTCDateTime('2015-01-01T00:00:00Z'))
    # gap at end of H
    gap = gaps['H'][1]
    assert_equals(gap[0], UTCDateTime('2015-01-01T00:00:03Z'))
    assert_equals(gap[1], UTCDateTime('2015-01-01T00:00:04Z'))
    # no gaps in Z channel
    assert_equals(len(gaps['Z']), 0)
Example #4
0
    def can_produce_data(self, starttime, endtime, stream):
        """Can Product data

        Parameters
        ----------
        starttime: UTCDateTime
            start time of requested output
        end : UTCDateTime
            end time of requested output
        stream: obspy.core.Stream
            The input stream we want to make certain has data for the algorithm
        """
        input_gaps = TimeseriesUtility.get_merged_gaps(
                TimeseriesUtility.get_stream_gaps(stream))
        for input_gap in input_gaps:
            # Check for gaps that include the entire range
            if (starttime >= input_gap[0] and
                    starttime <= input_gap[1] and
                    endtime < input_gap[2]):
                return False
        return True
Example #5
0
def test_get_trace_gaps():
    """geomag.TimeseriesUtility_test.test_get_trace_gaps

    confirm that gaps are found in a trace
    """
    trace = __create_trace('H', [1, 1, numpy.nan, numpy.nan, 0, 1])
    # set time of first sample
    trace.stats.starttime = UTCDateTime('2015-01-01T00:00:00Z')
    # set sample rate to 1 minute
    trace.stats.delta = 60
    # find gap
    gaps = TimeseriesUtility.get_trace_gaps(trace)
    assert_equals(len(gaps), 1)
    gap = gaps[0]
    assert_equals(gap[0], UTCDateTime('2015-01-01T00:02:00Z'))
    assert_equals(gap[1], UTCDateTime('2015-01-01T00:03:00Z'))
Example #6
0
def test_get_merged_gaps():
    """geomag.TimeseriesUtility_test.test_get_merged_gaps

    confirm that gaps are merged
    """
    merged = TimeseriesUtility.get_merged_gaps({
        'H': [
            # gap for 2 seconds, that starts after next gap
            [
                UTCDateTime('2015-01-01T00:00:01Z'),
                UTCDateTime('2015-01-01T00:00:03Z'),
                UTCDateTime('2015-01-01T00:00:04Z')
            ]
        ],
        # gap for 1 second, that occurs before previous gap
        'Z': [
            [
                UTCDateTime('2015-01-01T00:00:00Z'),
                UTCDateTime('2015-01-01T00:00:00Z'),
                UTCDateTime('2015-01-01T00:00:01Z')
            ],
            [
                UTCDateTime('2015-01-01T00:00:05Z'),
                UTCDateTime('2015-01-01T00:00:07Z'),
                UTCDateTime('2015-01-01T00:00:08Z')
            ],
        ]
    })
    assert_equals(len(merged), 2)
    # first gap combines H and Z gaps
    gap = merged[0]
    assert_equals(gap[0], UTCDateTime('2015-01-01T00:00:00Z'))
    assert_equals(gap[1], UTCDateTime('2015-01-01T00:00:03Z'))
    # second gap is second Z gap
    gap = merged[1]
    assert_equals(gap[0], UTCDateTime('2015-01-01T00:00:05Z'))
    assert_equals(gap[1], UTCDateTime('2015-01-01T00:00:07Z'))
Example #7
0
    def run_as_update(self, options, update_count=0):
        """Updates data.
        Parameters
        ----------
        options: dictionary
            The dictionary of all the command line arguments. Could in theory
            contain other options passed in by the controller.

        Notes
        -----
        Finds gaps in the target data, and if there's new data in the input
            source, calls run with the start/end time of a given gap to fill
            in.
        It checks the start of the target data, and if it's missing, and
            there's new data available, it backs up the starttime/endtime,
            and recursively calls itself, to check the previous period, to see
            if new data is available there as well. Calls run for each new
            period, oldest to newest.
        """
        # If an update_limit is set, make certain we don't step past it.
        if options.update_limit != 0:
            if update_count >= options.update_limit:
                return
        print >> sys.stderr, 'checking gaps', \
                options.starttime, options.endtime
        algorithm = self._algorithm
        input_channels = options.inchannels or \
                algorithm.get_input_channels()
        output_channels = options.outchannels or \
                algorithm.get_output_channels()
        # request output to see what has already been generated
        output_timeseries = self._get_output_timeseries(
            observatory=options.observatory,
            starttime=options.starttime,
            endtime=options.endtime,
            channels=output_channels)
        if len(output_timeseries) > 0:
            # find gaps in output, so they can be updated
            output_gaps = TimeseriesUtility.get_merged_gaps(
                TimeseriesUtility.get_stream_gaps(output_timeseries))
        else:
            output_gaps = [[
                options.starttime,
                options.endtime,
                # next sample time not used
                None
            ]]
        for output_gap in output_gaps:
            input_timeseries = self._get_input_timeseries(
                observatory=options.observatory,
                starttime=output_gap[0],
                endtime=output_gap[1],
                channels=input_channels)
            if not algorithm.can_produce_data(starttime=output_gap[0],
                                              endtime=output_gap[1],
                                              stream=input_timeseries):
                continue
            # check for fillable gap at start
            if output_gap[0] == options.starttime:
                # found fillable gap at start, recurse to previous interval
                interval = options.endtime - options.starttime
                starttime = options.starttime - interval
                endtime = options.starttime - 1
                options.starttime = starttime
                options.endtime = endtime
                self.run_as_update(options, update_count + 1)
            # fill gap
            options.starttime = output_gap[0]
            options.endtime = output_gap[1]
            print >> sys.stderr, 'processing', \
                    options.starttime, options.endtime
            self.run(options, input_timeseries)
Example #8
0
    def put_timeseries(self,
                       timeseries,
                       starttime=None,
                       endtime=None,
                       channels=None,
                       type=None,
                       interval=None):
        """Store timeseries data.

        Parameters
        ----------
        timeseries : obspy.core.Stream
            stream containing traces to store.
        starttime : UTCDateTime
            time of first sample in timeseries to store.
            uses first sample if unspecified.
        endtime : UTCDateTime
            time of last sample in timeseries to store.
            uses last sample if unspecified.
        channels : array_like
            list of channels to store, optional.
            uses default if unspecified.
        type : {'definitive', 'provisional', 'quasi-definitive', 'variation'}
            data type, optional.
            uses default if unspecified.
        interval : {'daily', 'hourly', 'minute', 'monthly', 'second'}
            data interval, optional.
            uses default if unspecified.
        Raises
        ------
        TimeseriesFactoryException
            if any errors occur.
        """
        if len(timeseries) == 0:
            # no data to put
            return
        if not self.urlTemplate.startswith('file://'):
            raise TimeseriesFactoryException('Only file urls are supported')
        channels = channels or self.channels
        type = type or self.type
        interval = interval or self.interval
        stats = timeseries[0].stats
        delta = stats.delta
        observatory = stats.station
        starttime = starttime or stats.starttime
        endtime = endtime or stats.endtime

        urlIntervals = Util.get_intervals(starttime=starttime,
                                          endtime=endtime,
                                          size=self.urlInterval)
        for urlInterval in urlIntervals:
            url = self._get_url(observatory=observatory,
                                date=urlInterval['start'],
                                type=type,
                                interval=interval,
                                channels=channels)
            url_data = timeseries.slice(
                starttime=urlInterval['start'],
                # subtract delta to omit the sample at end: `[start, end)`
                endtime=(urlInterval['end'] - delta))
            url_file = Util.get_file_from_url(url, createParentDirectory=True)
            # existing data file, merge new data into existing
            if os.path.isfile(url_file):
                try:
                    existing_data = Util.read_file(url_file)
                    existing_data = self.parse_string(
                        existing_data,
                        observatory=url_data[0].stats.station,
                        type=type,
                        interval=interval,
                        channels=channels)
                    # TODO: make parse_string return the correct location code
                    for trace in existing_data:
                        # make location codes match, just in case
                        new_trace = url_data.select(
                            network=trace.stats.network,
                            station=trace.stats.station,
                            channel=trace.stats.channel)[0]
                        trace.stats.location = new_trace.stats.location
                    url_data = TimeseriesUtility.merge_streams(
                        existing_data, url_data)
                except IOError:
                    # no data yet
                    pass
                except NotImplementedError:
                    # factory only supports output
                    pass
            with open(url_file, 'wb') as fh:
                try:
                    self.write_file(fh, url_data, channels)
                except NotImplementedError:
                    raise NotImplementedError(
                        '"put_timeseries" not implemented')
Example #9
0
    def run_as_update(self, options, update_count=0):
        """Updates data.
        Parameters
        ----------
        options: dictionary
            The dictionary of all the command line arguments. Could in theory
            contain other options passed in by the controller.

        Notes
        -----
        Finds gaps in the target data, and if there's new data in the input
            source, calls run with the start/end time of a given gap to fill
            in.
        It checks the start of the target data, and if it's missing, and
            there's new data available, it backs up the starttime/endtime,
            and recursively calls itself, to check the previous period, to see
            if new data is available there as well. Calls run for each new
            period, oldest to newest.
        """
        # If an update_limit is set, make certain we don't step past it.
        if options.update_limit != 0:
            if update_count >= options.update_limit:
                return
        print >> sys.stderr, 'checking gaps', \
                options.starttime, options.endtime
        algorithm = self._algorithm
        input_channels = options.inchannels or \
                algorithm.get_input_channels()
        output_channels = options.outchannels or \
                algorithm.get_output_channels()
        # request output to see what has already been generated
        output_timeseries = self._get_output_timeseries(
                observatory=options.observatory,
                starttime=options.starttime,
                endtime=options.endtime,
                channels=output_channels)
        if len(output_timeseries) > 0:
            # find gaps in output, so they can be updated
            output_gaps = TimeseriesUtility.get_merged_gaps(
                    TimeseriesUtility.get_stream_gaps(output_timeseries))
        else:
            output_gaps = [[
                options.starttime,
                options.endtime,
                # next sample time not used
                None
            ]]
        for output_gap in output_gaps:
            input_timeseries = self._get_input_timeseries(
                    observatory=options.observatory,
                    starttime=output_gap[0],
                    endtime=output_gap[1],
                    channels=input_channels)
            if not algorithm.can_produce_data(
                    starttime=output_gap[0],
                    endtime=output_gap[1],
                    stream=input_timeseries):
                continue
            # check for fillable gap at start
            if output_gap[0] == options.starttime:
                # found fillable gap at start, recurse to previous interval
                interval = options.endtime - options.starttime
                starttime = options.starttime - interval
                endtime = options.starttime - 1
                options.starttime = starttime
                options.endtime = endtime
                self.run_as_update(options, update_count + 1)
            # fill gap
            options.starttime = output_gap[0]
            options.endtime = output_gap[1]
            print >> sys.stderr, 'processing', \
                    options.starttime, options.endtime
            self.run(options)
    def put_timeseries(self, timeseries, starttime=None, endtime=None,
            channels=None, type=None, interval=None):
        """Store timeseries data.

        Parameters
        ----------
        timeseries : obspy.core.Stream
            stream containing traces to store.
        starttime : UTCDateTime
            time of first sample in timeseries to store.
            uses first sample if unspecified.
        endtime : UTCDateTime
            time of last sample in timeseries to store.
            uses last sample if unspecified.
        channels : array_like
            list of channels to store, optional.
            uses default if unspecified.
        type : {'definitive', 'provisional', 'quasi-definitive', 'variation'}
            data type, optional.
            uses default if unspecified.
        interval : {'daily', 'hourly', 'minute', 'monthly', 'second'}
            data interval, optional.
            uses default if unspecified.
        Raises
        ------
        TimeseriesFactoryException
            if any errors occur.
        """
        if len(timeseries) == 0:
            # no data to put
            return
        if not self.urlTemplate.startswith('file://'):
            raise TimeseriesFactoryException('Only file urls are supported')
        channels = channels or self.channels
        type = type or self.type
        interval = interval or self.interval
        stats = timeseries[0].stats
        delta = stats.delta
        observatory = stats.station
        starttime = starttime or stats.starttime
        endtime = endtime or stats.endtime

        urlIntervals = Util.get_intervals(
                starttime=starttime,
                endtime=endtime,
                size=self.urlInterval)
        for urlInterval in urlIntervals:
            url = self._get_url(
                    observatory=observatory,
                    date=urlInterval['start'],
                    type=type,
                    interval=interval,
                    channels=channels)
            url_data = timeseries.slice(
                    starttime=urlInterval['start'],
                    # subtract delta to omit the sample at end: `[start, end)`
                    endtime=(urlInterval['end'] - delta))
            url_file = Util.get_file_from_url(url, createParentDirectory=True)
            # existing data file, merge new data into existing
            if os.path.isfile(url_file):
                try:
                    existing_data = Util.read_file(url_file)
                    existing_data = self.parse_string(existing_data,
                            observatory=url_data[0].stats.station,
                            type=type,
                            interval=interval,
                            channels=channels)
                    # TODO: make parse_string return the correct location code
                    for trace in existing_data:
                        # make location codes match, just in case
                        new_trace = url_data.select(
                                network=trace.stats.network,
                                station=trace.stats.station,
                                channel=trace.stats.channel)[0]
                        trace.stats.location = new_trace.stats.location
                    url_data = TimeseriesUtility.merge_streams(
                            existing_data, url_data)
                except IOError:
                    # no data yet
                    pass
                except NotImplementedError:
                    # factory only supports output
                    pass
            with open(url_file, 'wb') as fh:
                try:
                    self.write_file(fh, url_data, channels)
                except NotImplementedError:
                    raise NotImplementedError(
                            '"put_timeseries" not implemented')