def enumerate_events(*timeseries_list): """Yield the events for all the days of the given time series. Parameters: *timeseries_list* list of time series Each of the given time series should specify values for possibly non-continous ranges of dates. For each day present in a time series, this method yields a tuple of events of all time series. If that day is present in a time series, the tuple contains the corresponding event. If that day is not present, the tuple contains an event with value 0 at that day. The description above only mentions dates. However, this method can handle events whose 'date' include a time component *as long as* the 'date' object supports an isocalendar() method as datetime.date and datetime.datetime do. """ next_start = datetime.max for timeseries in timeseries_list: start = next((event[0] for event in timeseries.events()), None) if not start is None: next_start = min(next_start, start) if next_start == datetime.max: # none of the time series contains an event and we stop immediately return # next_start is the first date for which an event is specified events_list = [timeseries.events() for timeseries in timeseries_list] earliest_event_list = [next(events, None) for events in events_list] timeseries_count = len(timeseries_list) no_events_are_present = False while not no_events_are_present: no_events_are_present = True to_yield = [(next_start, 0.0)] * timeseries_count for index, earliest_event in enumerate(earliest_event_list): if not earliest_event is None: no_events_are_present = False if earliest_event[0].isocalendar() == next_start.isocalendar(): to_yield[index] = earliest_event earliest_event_list[index] = next(events_list[index], None) next_start = next_start + timedelta(1) if not no_events_are_present: yield tuple(to_yield)
def split_timeseries(timeseries): """Return the 2-tuple of non-positive and non-negative time series. Parameters: *timeseries* time series that contains the events for the new 2 -tuple This function creates a 2-tuple of TimeseriesStub, where the first element contains all non-positive events (of the given time series) and the second element contains all non-negative events. The 2 resulting time series have events for the same dates as the given time series, but with value zero if the value at that date does not have the right sign. """ non_pos_timeseries = SparseTimeseriesStub() non_neg_timeseries = SparseTimeseriesStub() for (date, value) in timeseries.events(): if value > 0: non_pos_timeseries.add_value(date, 0) non_neg_timeseries.add_value(date, value) elif value < 0: non_pos_timeseries.add_value(date, value) non_neg_timeseries.add_value(date, 0) else: non_pos_timeseries.add_value(date, 0) non_neg_timeseries.add_value(date, 0) return (non_pos_timeseries, non_neg_timeseries)
def multiply_timeseries(timeseries, value): """Return the product of the given time series with the given value. """ product = SparseTimeseriesStub() for event in timeseries.events(): product.add_value(event[0], event[1] * value) return product
def map_timeseries(timeseries, map_function): """Apply the given map function to each value of the given time series. This method returns a time series. """ product = SparseTimeseriesStub() for time, value in timeseries.events(): product.add_value(time, map_function(value)) return product
def cumulative_event_values(timeseries, reset_period, period='month', multiply=1, time_shift=0): """Return iterator with major events and at least with interval. cumulative is reset on reset_period Aggregation function is sum. Optional: take average. """ if reset_period == 'hydro_year' and period == 'year': # This is a really strange combination for which the rest of this # function is not suited. We fix that as follows. period = 'hydro_year' # When the reset period is smaller than the group period, it is possible # that the grouper returns a date before the date of the resetter, for # example when the reset period is a month and the group period a # quarter. But to which cumulative time series should this lead? # # To "fix" this problem, we use the following rule: # # When the reset period is smaller than the group period, use the reset # period also for the group period. # # In this way, the user always sees the reset. keys = ['day', 'month', 'quarter', 'hydro_year', 'year'] if keys.index(reset_period) < keys.index(period): period = reset_period firsters = { 'year': _first_of_year, 'hydro_year': _first_of_hydro_year, 'month': _first_of_month, 'quarter': _first_of_quarter, 'day': _first_of_day } reseter = firsters.get(reset_period) assert reseter is not None grouper = firsters.get(period) assert grouper is not None cumulative = 0 time_shift = timedelta(time_shift) for date, events in itertools.groupby(timeseries.events(), reseter): cumulative = 0 for cum_date, cum_events in itertools.groupby(events, grouper): cumulative += sum(value for (date, value) in cum_events) yield (cum_date + time_shift), cumulative * multiply
def create_empty_timeseries(timeseries): """Return the empty TimeseriesStub that starts on the same day as the given time series. If the given time series is non-empty, this function returns a TimeseriesStub with a single event that starts on the day as the given time series and which has value 0.0. If the given time series is empty, this function returns an empty TimeseriesStub. """ empty_timeseries = TimeseriesStub() event = next(timeseries.events(), None) if not event is None: empty_timeseries.add_value(event[0], 0.0) return empty_timeseries
def cumulative_event_values(timeseries, reset_period, period='month', multiply=1, time_shift=0): """Return iterator with major events and at least with interval. cumulative is reset on reset_period Aggregation function is sum. Optional: take average. """ if reset_period == 'hydro_year' and period == 'year': # This is a really strange combination for which the rest of this # function is not suited. We fix that as follows. period = 'hydro_year' # When the reset period is smaller than the group period, it is possible # that the grouper returns a date before the date of the resetter, for # example when the reset period is a month and the group period a # quarter. But to which cumulative time series should this lead? # # To "fix" this problem, we use the following rule: # # When the reset period is smaller than the group period, use the reset # period also for the group period. # # In this way, the user always sees the reset. keys = ['day', 'month', 'quarter', 'hydro_year', 'year'] if keys.index(reset_period) < keys.index(period): period = reset_period firsters = {'year': _first_of_year, 'hydro_year': _first_of_hydro_year, 'month': _first_of_month, 'quarter': _first_of_quarter, 'day': _first_of_day} reseter = firsters.get(reset_period) assert reseter is not None grouper = firsters.get(period) assert grouper is not None cumulative = 0 time_shift = timedelta(time_shift) for date, events in itertools.groupby(timeseries.events(), reseter): cumulative = 0 for cum_date, cum_events in itertools.groupby(events, grouper): cumulative += sum(value for (date, value) in cum_events) yield (cum_date + time_shift), cumulative * multiply
def grouped_event_values(timeseries, period, average=False): """Return iterator with totals for days/months/years for timeseries. Aggregation function is sum. Optional: take average. >>> ts = TimeseriesStub() # empty timeseries >>> [i for i in grouped_event_values(ts, 'day')] [] >>> [i for i in grouped_event_values(ts, 'month')] [] >>> [i for i in grouped_event_values(ts, 'quarter')] [] >>> [i for i in grouped_event_values(ts, 'year')] [] >>> [i for i in grouped_event_values(ts, 'not_a_period')] Traceback (most recent call last): ... AssertionError >>> """ groupers = { 'year': _first_of_year, 'month': _first_of_month, 'quarter': _first_of_quarter, 'day': _first_of_day } grouper = groupers.get(period) assert grouper is not None for date, events in itertools.groupby(timeseries.events(), grouper): if average: # To be able to count the events, we make a list of the # generated elements. There are ways to count them without # having to make the list explicit but this is the easy # way. events = list(events) result = (sum(value for (date, value) in events) / (1.0 * len(events))) else: result = sum(value for (date, value) in events) yield date, result
def grouped_event_values(timeseries, period, average=False): """Return iterator with totals for days/months/years for timeseries. Aggregation function is sum. Optional: take average. >>> ts = TimeseriesStub() # empty timeseries >>> [i for i in grouped_event_values(ts, 'day')] [] >>> [i for i in grouped_event_values(ts, 'month')] [] >>> [i for i in grouped_event_values(ts, 'quarter')] [] >>> [i for i in grouped_event_values(ts, 'year')] [] >>> [i for i in grouped_event_values(ts, 'not_a_period')] Traceback (most recent call last): ... AssertionError >>> """ groupers = {'year': _first_of_year, 'month': _first_of_month, 'quarter': _first_of_quarter, 'day': _first_of_day} grouper = groupers.get(period) assert grouper is not None for date, events in itertools.groupby(timeseries.events(), grouper): if average: # To be able to count the events, we make a list of the # generated elements. There are ways to count them without # having to make the list explicit but this is the easy # way. events = list(events) result = (sum(value for (date, value) in events) / (1.0 * len(events))) else: result = sum(value for (date, value) in events) yield date, result
def enumerate_dict_events(timeseries_dict): """Yield the events for all the days of the given time series. Parameter: *timeseries_dict* dictionary where a value is - a timeseries or - a dictionary where **each** value is a timeseries Each of the given time series should specify values for possibly non-continous ranges of dates. For each day present in a time series, this method yields a tuple of events of all time series. If that day is present in a time series, the tuple contains the corresponding event. If that day is not present, the tuple contains an event with value 0 at that day. The description above only mentions dates. However, this method can handle events whose 'date' include a time component *as long as* the 'date' object supports an isocalendar() method as datetime.date and datetime.datetime do. """ next_start = datetime.max #get earliest moment for timeseries in timeseries_dict.values(): if not type(timeseries) == type({}): start = next((event[0] for event in timeseries.events()), None) else: for ts_nested in timeseries.values(): start = next((event[0] for event in ts_nested.events()), None) if not start is None: next_start = min(next_start, start) if next_start == datetime.max: # none of the time series contains an event and we stop immediately return # next_start is the first date for which an event is specified events_list = [] keys_list = [] for key, timeseries in timeseries_dict.items(): if not type(timeseries) == type({}): events_list.append(timeseries.events()) keys_list.append([key]) else: #nested timeserie for key_nested, timeseries_nested in timeseries.items(): events_list.append(timeseries_nested.events()) keys_list.append([key, key_nested]) earliest_event_list = [next(events, None) for events in events_list] no_events_are_present = False while not no_events_are_present: no_events_are_present = True to_yield = {'date': next_start} for key in keys_list: if len(key) == 1: to_yield[key[0]] = (next_start, 0.0) else: if key[0] not in to_yield: to_yield[key[0]] = {} to_yield[key[0]][key[1]] = (next_start, 0.0) for index, earliest_event in enumerate(earliest_event_list): if not earliest_event is None: no_events_are_present = False if earliest_event[0].isocalendar() == next_start.isocalendar(): if len(keys_list[index]) == 1: to_yield[keys_list[index][0]] = earliest_event else: if keys_list[index][0] not in to_yield: to_yield[keys_list[index][0]] = {} to_yield[keys_list[index][0]][keys_list[index][1]] = \ earliest_event earliest_event_list[index] = next(events_list[index], None) next_start = next_start + timedelta(1) if not no_events_are_present: yield to_yield