Beispiel #1
0
 def test_monthly_causal(self, monthly_df, recipe_config):
     recipe_config["causal_window"] = True
     recipe_config["window_type"] = "triang"
     params_causal = get_params(recipe_config)
     window_aggregator = WindowAggregator(params_causal)
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(monthly_df, datetime_column)
     assert output_df.shape == (6, 7)
Beispiel #2
0
 def test_annual_no_causal(self, annual_df, recipe_config):
     recipe_config["window_unit"] = "years"
     params_no_causal = get_params(recipe_config)
     window_aggregator = WindowAggregator(params_no_causal)
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(annual_df, datetime_column)
     assert output_df.shape == (6, 4)
     np.testing.assert_array_equal(
         output_df.value1_sum, np.array([np.nan, 17, 15, 11, 8, np.nan]))
Beispiel #3
0
 def test_monthly_no_causal(self, monthly_df, recipe_config):
     params_no_causal = get_params(recipe_config)
     window_aggregator = WindowAggregator(params_no_causal)
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(monthly_df, datetime_column)
     assert output_df.shape == (6, 7)
     np.testing.assert_array_equal(
         output_df.value1_sum.values,
         np.array([np.nan, 17, 15, 11, 8, np.nan]))
Beispiel #4
0
 def test_weekly_no_causal(self, weekly_df, recipe_config):
     params_no_causal = get_params(recipe_config)
     window_aggregator = WindowAggregator(params_no_causal)
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(weekly_df, datetime_column)
     assert output_df.shape == (9, 4)
     np.testing.assert_array_equal(
         output_df.value1_sum.values,
         np.array(
             [np.nan, np.nan, np.nan, 27, 26, 21, np.nan, np.nan, np.nan]))
Beispiel #5
0
 def test_long_format_numerical(self, long_df_numerical, params,
                                recipe_config, columns):
     window_aggregator = WindowAggregator(params)
     groupby_columns = ["country"]
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(long_df_numerical,
                                           datetime_column,
                                           groupby_columns=groupby_columns)
     np.testing.assert_array_equal(output_df.country.values,
                                   np.array([1, 1, 1, 1, 2, 2, 2, 2]))
Beispiel #6
0
 def test_annual_causal(self, annual_df, recipe_config):
     recipe_config["causal_window"] = True
     recipe_config["window_type"] = "triang"
     recipe_config["window_unit"] = "years"
     params_causal = get_params(recipe_config)
     window_aggregator = WindowAggregator(params_causal)
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(annual_df, datetime_column)
     np.testing.assert_array_equal(
         output_df.value1_avg,
         np.array([np.nan, np.nan, np.nan, 6.5, 4.75, 3.25]))
     assert output_df.shape == (6, 4)
Beispiel #7
0
 def test_empty_identifiers(self, df, params, recipe_config, columns):
     window_aggregator = WindowAggregator(params)
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(df,
                                           datetime_column,
                                           groupby_columns=[])
     assert output_df.shape == (4, 5)
     output_df = window_aggregator.compute(df, datetime_column)
     assert output_df.shape == (4, 5)
     output_df = window_aggregator.compute(df,
                                           datetime_column,
                                           groupby_columns=None)
     assert output_df.shape == (4, 5)
Beispiel #8
0
 def test_weekly_causal(self, weekly_df, recipe_config):
     recipe_config["causal_window"] = True
     recipe_config["window_type"] = "triang"
     params_causal = get_params(recipe_config)
     window_aggregator = WindowAggregator(params_causal)
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(weekly_df, datetime_column)
     np.testing.assert_array_equal(
         output_df.value1_sum,
         np.array([
             np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 15.25,
             13.25
         ]))
     assert output_df.shape == (9, 4)
Beispiel #9
0
 def test_mix_identifiers(self, long_df_4, params, recipe_config, columns):
     window_aggregator = WindowAggregator(params)
     groupby_columns = ["country", "item", "store"]
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(long_df_4,
                                           datetime_column,
                                           groupby_columns=groupby_columns)
     expected_dates = pd.DatetimeIndex([
         '2020-01-31T00:00:00.000000000', '2020-02-29T00:00:00.000000000',
         '2020-02-29T00:00:00.000000000', '2020-01-31T00:00:00.000000000',
         '2020-01-31T00:00:00.000000000', '2020-02-29T00:00:00.000000000',
         '2020-01-31T00:00:00.000000000', '2020-02-29T00:00:00.000000000'
     ])
     np.testing.assert_array_equal(output_df[datetime_column].values,
                                   expected_dates)
Beispiel #10
0
 def test_weeks(self, recipe_config, columns):
     recipe_config["window_unit"] = "weeks"
     params = get_params(recipe_config)
     window_aggregator = WindowAggregator(params)
     datetime_column = columns.date
     df = get_df_DST("W", columns)
     output_df = window_aggregator.compute(df, datetime_column)
     assert output_df.shape == (6, 7)
     expected_dates = pd.DatetimeIndex([
         '2019-02-03T00:59:00.000000000', '2019-02-10T00:59:00.000000000',
         '2019-02-17T00:59:00.000000000', '2019-02-24T00:59:00.000000000',
         '2019-03-03T00:59:00.000000000', '2019-03-10T00:59:00.000000000'
     ])
     np.testing.assert_array_equal(output_df[columns.date].values,
                                   expected_dates)
Beispiel #11
0
    def test_nanoseconds(self, recipe_config, columns):
        recipe_config["window_unit"] = "nanoseconds"
        params = get_params(recipe_config)
        window_aggregator = WindowAggregator(params)
        datetime_column = columns.date
        df = get_df_DST("N", columns)
        output_df = window_aggregator.compute(df, datetime_column)

        assert output_df.shape == (6, 7)
        expected_dates = pd.DatetimeIndex([
            '2019-01-31T00:59:00.000000000', '2019-01-31T00:59:00.000000001',
            '2019-01-31T00:59:00.000000002', '2019-01-31T00:59:00.000000003',
            '2019-01-31T00:59:00.000000004', '2019-01-31T00:59:00.000000005'
        ])
        np.testing.assert_array_equal(output_df[columns.date].values,
                                      expected_dates)
Beispiel #12
0
 def test_two_identifiers(self, long_df_2, params, recipe_config, columns):
     window_aggregator = WindowAggregator(params)
     groupby_columns = ["country", "item"]
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(long_df_2,
                                           datetime_column,
                                           groupby_columns=groupby_columns)
     np.testing.assert_array_equal(
         output_df[datetime_column].values,
         pd.DatetimeIndex([
             '1959-01-31T00:00:00.000000000',
             '1959-02-28T00:00:00.000000000',
             '1959-01-31T00:00:00.000000000',
             '1959-02-28T00:00:00.000000000',
             '1959-01-31T00:00:00.000000000',
             '1959-02-28T00:00:00.000000000'
         ]))
Beispiel #13
0
 def test_year_start(self, annual_start_df, recipe_config):
     recipe_config["window_unit"] = "years"
     params = get_params(recipe_config)
     window_aggregator = WindowAggregator(params)
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(annual_start_df, datetime_column)
     assert output_df.shape == (6, 4)
     np.testing.assert_array_equal(
         output_df.Date.values,
         pd.DatetimeIndex([
             '2015-01-01T00:00:00.000000000',
             '2016-01-01T00:00:00.000000000',
             '2017-01-01T00:00:00.000000000',
             '2018-01-01T00:00:00.000000000',
             '2019-01-01T00:00:00.000000000',
             '2020-01-01T00:00:00.000000000'
         ]))
Beispiel #14
0
 def test_long_format(self, long_df, params, recipe_config, columns):
     window_aggregator = WindowAggregator(params)
     groupby_columns = [columns.category]
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(long_df,
                                           datetime_column,
                                           groupby_columns=groupby_columns)
     np.testing.assert_array_equal(
         np.round(output_df[columns.aggregation].values, 2),
         np.array(
             [np.nan, 315.58, 315.98, 316.25, np.nan, 345., 289.5, 226.33]))
     np.testing.assert_array_equal(
         output_df.country.values,
         np.array([
             'first', 'first', 'first', 'first', 'second', 'second',
             'second', 'second'
         ]))
Beispiel #15
0
 def test_month_start(self, monthly_start_df, recipe_config):
     recipe_config["window_width"] = 1
     recipe_config["aggregation_types"] = [u'average', 'retrieve']
     params = get_params(recipe_config)
     window_aggregator = WindowAggregator(params)
     datetime_column = recipe_config.get('datetime_column')
     output_df = window_aggregator.compute(monthly_start_df,
                                           datetime_column)
     assert output_df.shape == (6, 5)
     np.testing.assert_array_equal(
         output_df.Date.values,
         pd.DatetimeIndex([
             '2015-01-01T00:00:00.000000000',
             '2015-02-01T00:00:00.000000000',
             '2015-03-01T00:00:00.000000000',
             '2015-04-01T00:00:00.000000000',
             '2015-05-01T00:00:00.000000000',
             '2015-06-01T00:00:00.000000000'
         ]))
Beispiel #16
0
    def test_long_format_no_causal(self, long_df, params_no_causal,
                                   recipe_config, columns):
        window_aggregator = WindowAggregator(params_no_causal)
        groupby_columns = ["country"]
        datetime_column = recipe_config.get('datetime_column')
        output_df = window_aggregator.compute(long_df,
                                              datetime_column,
                                              groupby_columns=groupby_columns)

        np.testing.assert_array_equal(
            np.round(output_df[columns.aggregation].values, 2),
            np.array(
                [np.nan, 316.25, 316.46, np.nan, np.nan, 226.33, 211.,
                 np.nan]))
        np.testing.assert_array_equal(
            output_df.country.values,
            np.array([
                'first', 'first', 'first', 'first', 'second', 'second',
                'second', 'second'
            ]))
Beispiel #17
0
    def test_invalid_frequencies(self, annual_df, recipe_config):
        params_no_causal = get_params(recipe_config)
        window_aggregator = WindowAggregator(params_no_causal)
        datetime_column = recipe_config.get('datetime_column')
        with pytest.raises(Exception) as err:
            _ = window_aggregator.compute(annual_df, datetime_column)
        assert "smaller than the timeseries frequency" in str(err.value)

        recipe_config["causal_window"] = True
        recipe_config["window_type"] = "triang"
        params_causal = get_params(recipe_config)
        window_aggregator = WindowAggregator(params_causal)
        with pytest.raises(Exception) as err:
            _ = window_aggregator.compute(annual_df, datetime_column)
        assert "smaller than the timeseries frequency" in str(err.value)

        recipe_config["window_type"] = "none"
        params_causal = get_params(recipe_config)
        window_aggregator = WindowAggregator(params_causal)
        output_df = window_aggregator.compute(annual_df, datetime_column)
        np.testing.assert_array_equal(output_df.value1_sum,
                                      np.nan * np.ones(6))
from recipe_config_loading import check_time_column_parameter, check_and_get_groupby_columns, check_python_version, get_windowing_params

logger = logging.getLogger(__name__)
logging.basicConfig(
    level=logging.INFO,
    format='timeseries-preparation plugin %(levelname)s - %(message)s')

check_python_version()

# --- Setup
(input_dataset, output_dataset) = get_input_output()
recipe_config = get_recipe_config()
input_dataset_columns = [
    column["name"] for column in input_dataset.read_schema()
]
check_time_column_parameter(recipe_config, input_dataset_columns)
datetime_column = recipe_config.get('datetime_column')
groupby_columns = check_and_get_groupby_columns(recipe_config,
                                                input_dataset_columns)
params = get_windowing_params(recipe_config)

# --- Run
df = input_dataset.get_dataframe()
window_aggregator = WindowAggregator(params)
output_df = window_aggregator.compute(df,
                                      datetime_column,
                                      groupby_columns=groupby_columns)

# --- Write output
output_dataset.write_with_schema(output_df)