def test_incremental_df_right_closed(self):
     length = 100
     data = [x for x in range(length)]
     df = _make_df_with_one_col(data)
     print(df.shape)
     params = dku_timeseries.WindowAggregatorParams(window_width=3,
                                                    closed_option='right',
                                                    window_type='gaussian')
     window_aggregator = dku_timeseries.WindowAggregator(params)
     output_df = window_aggregator.compute(df, TIME_COL)
     ground_truth = [0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8]
     for x, y in zip(output_df[DATA_COL + '_min'][1:], ground_truth[1:]):
         assert output_df[DATA_COL][x] == y
Ejemplo n.º 2
0
    def test_extrema_without_neighbors(self):
        length = 100
        data = [x for x in range(length)]
        df = _make_df_with_one_col(data)

        window_aggregator = dku_timeseries.WindowAggregator(
            dku_timeseries.WindowAggregatorParams(window_unit='milliseconds'))
        params = dku_timeseries.ExtremaExtractorParams(
            window_aggregator=window_aggregator)
        extrema_extractor = dku_timeseries.ExtremaExtractor(params)
        output_df = extrema_extractor.compute(df, TIME_COL, DATA_COL)
        # only have DATE_TIME col and DATA_COL of the extrema, no stats because no neighbors
        assert output_df.shape == (1, 2)
        assert output_df[DATA_COL][0] == 99
    def test_group_window_time_unit(self):
        start_time_1 = pd.Timestamp('20190131 01:59:00').tz_localize('CET')
        start_time_2 = pd.Timestamp('20190131 02:00:00').tz_localize('CET')
        start_time_list = [start_time_1, start_time_2]

        len1 = 100
        len2 = 10
        data1 = range(len1)
        data2 = range(len2)
        data_list = [data1, data2]

        period1 = pd.DateOffset(seconds=1)
        period2 = pd.DateOffset(seconds=1)
        period_list = [period1, period2]

        df_list = []
        for group_id, data, period, start_time in zip(range(len(data_list)),
                                                      data_list, period_list,
                                                      start_time_list):
            group_name = 'group_{}'.format(group_id)
            temp_df = _make_df_with_one_col(data,
                                            period=period,
                                            start_time=start_time)
            temp_df.loc[:, GROUP_COL] = group_name
            df_list.append(temp_df)

        df = pd.concat(df_list, axis=0)

        params = dku_timeseries.WindowAggregatorParams(window_width=3,
                                                       closed_option='left',
                                                       window_type=None)
        window_aggregator = dku_timeseries.WindowAggregator(params)
        output_df = window_aggregator.compute(df,
                                              datetime_column=TIME_COL,
                                              groupby_columns=[GROUP_COL])

        ground_truth = [np.NaN, 0, 0, 0, 1, 2, 3, 4, 5, 6]
        output_0 = output_df.groupby(GROUP_COL).get_group(
            'group_0').data_col_min.values[:10]
        assert math.isnan(output_0[0])
        assert np.array_equal(output_0[1:], ground_truth[1:])
        output_1 = output_df.groupby(GROUP_COL).get_group(
            'group_1').data_col_min.values[:10]
        assert math.isnan(output_1[0])
        assert np.array_equal(output_1[1:], ground_truth[1:])
Ejemplo n.º 4
0
    def test_group_extrema_without_neighbors(self):
        start_time_1 = pd.Timestamp('20190131 01:59:00').tz_localize('CET')
        start_time_2 = pd.Timestamp('20190131 02:00:00').tz_localize('CET')
        start_time_list = [start_time_1, start_time_2]

        len1 = 100
        len2 = 10
        data1 = range(len1)
        data2 = range(len2)
        data_list = [data1, data2]

        period1 = pd.DateOffset(seconds=1)
        period2 = pd.DateOffset(seconds=1)
        period_list = [period1, period2]

        df_list = []
        for group_id, data, period, start_time in zip(range(len(data_list)),
                                                      data_list, period_list,
                                                      start_time_list):
            group_name = 'group_{}'.format(group_id)
            temp_df = _make_df_with_one_col(data,
                                            period=period,
                                            start_time=start_time)
            temp_df[GROUP_COL] = group_name
            df_list.append(temp_df)

        df = pd.concat(df_list, axis=0)

        window_aggregator = dku_timeseries.WindowAggregator(
            dku_timeseries.WindowAggregatorParams(window_unit='milliseconds'))
        params = dku_timeseries.ExtremaExtractorParams(
            window_aggregator=window_aggregator)
        extrema_extractor = dku_timeseries.ExtremaExtractor(params)
        output_df = extrema_extractor.compute(df,
                                              TIME_COL,
                                              DATA_COL,
                                              groupby_columns=[GROUP_COL])
        assert output_df.shape == (2, 3)
        assert np.array_equal(output_df[DATA_COL], [99, 9])
Ejemplo n.º 5
0
def _make_window_aggregator():
    params = _make_window_aggregator_params()
    return dku_timeseries.WindowAggregator(params)
def _make_window_aggregator(window_width=1):
    params = _make_window_aggregator_params(window_width)
    return dku_timeseries.WindowAggregator(params)