def test_incremental_df_right_closed(self): length = 100 data = [x for x in range(length)] df = _make_df_with_one_col(data) print(df.shape) params = dku_timeseries.WindowAggregatorParams(window_width=3, closed_option='right', window_type='gaussian') window_aggregator = dku_timeseries.WindowAggregator(params) output_df = window_aggregator.compute(df, TIME_COL) ground_truth = [0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8] for x, y in zip(output_df[DATA_COL + '_min'][1:], ground_truth[1:]): assert output_df[DATA_COL][x] == y
def test_extrema_without_neighbors(self): length = 100 data = [x for x in range(length)] df = _make_df_with_one_col(data) window_aggregator = dku_timeseries.WindowAggregator( dku_timeseries.WindowAggregatorParams(window_unit='milliseconds')) params = dku_timeseries.ExtremaExtractorParams( window_aggregator=window_aggregator) extrema_extractor = dku_timeseries.ExtremaExtractor(params) output_df = extrema_extractor.compute(df, TIME_COL, DATA_COL) # only have DATE_TIME col and DATA_COL of the extrema, no stats because no neighbors assert output_df.shape == (1, 2) assert output_df[DATA_COL][0] == 99
def test_group_window_time_unit(self): start_time_1 = pd.Timestamp('20190131 01:59:00').tz_localize('CET') start_time_2 = pd.Timestamp('20190131 02:00:00').tz_localize('CET') start_time_list = [start_time_1, start_time_2] len1 = 100 len2 = 10 data1 = range(len1) data2 = range(len2) data_list = [data1, data2] period1 = pd.DateOffset(seconds=1) period2 = pd.DateOffset(seconds=1) period_list = [period1, period2] df_list = [] for group_id, data, period, start_time in zip(range(len(data_list)), data_list, period_list, start_time_list): group_name = 'group_{}'.format(group_id) temp_df = _make_df_with_one_col(data, period=period, start_time=start_time) temp_df.loc[:, GROUP_COL] = group_name df_list.append(temp_df) df = pd.concat(df_list, axis=0) params = dku_timeseries.WindowAggregatorParams(window_width=3, closed_option='left', window_type=None) window_aggregator = dku_timeseries.WindowAggregator(params) output_df = window_aggregator.compute(df, datetime_column=TIME_COL, groupby_columns=[GROUP_COL]) ground_truth = [np.NaN, 0, 0, 0, 1, 2, 3, 4, 5, 6] output_0 = output_df.groupby(GROUP_COL).get_group( 'group_0').data_col_min.values[:10] assert math.isnan(output_0[0]) assert np.array_equal(output_0[1:], ground_truth[1:]) output_1 = output_df.groupby(GROUP_COL).get_group( 'group_1').data_col_min.values[:10] assert math.isnan(output_1[0]) assert np.array_equal(output_1[1:], ground_truth[1:])
def test_group_extrema_without_neighbors(self): start_time_1 = pd.Timestamp('20190131 01:59:00').tz_localize('CET') start_time_2 = pd.Timestamp('20190131 02:00:00').tz_localize('CET') start_time_list = [start_time_1, start_time_2] len1 = 100 len2 = 10 data1 = range(len1) data2 = range(len2) data_list = [data1, data2] period1 = pd.DateOffset(seconds=1) period2 = pd.DateOffset(seconds=1) period_list = [period1, period2] df_list = [] for group_id, data, period, start_time in zip(range(len(data_list)), data_list, period_list, start_time_list): group_name = 'group_{}'.format(group_id) temp_df = _make_df_with_one_col(data, period=period, start_time=start_time) temp_df[GROUP_COL] = group_name df_list.append(temp_df) df = pd.concat(df_list, axis=0) window_aggregator = dku_timeseries.WindowAggregator( dku_timeseries.WindowAggregatorParams(window_unit='milliseconds')) params = dku_timeseries.ExtremaExtractorParams( window_aggregator=window_aggregator) extrema_extractor = dku_timeseries.ExtremaExtractor(params) output_df = extrema_extractor.compute(df, TIME_COL, DATA_COL, groupby_columns=[GROUP_COL]) assert output_df.shape == (2, 3) assert np.array_equal(output_df[DATA_COL], [99, 9])
def _make_window_aggregator(): params = _make_window_aggregator_params() return dku_timeseries.WindowAggregator(params)
def _make_window_aggregator(window_width=1): params = _make_window_aggregator_params(window_width) return dku_timeseries.WindowAggregator(params)