def test_long_format_numerical(self, long_df_numerical, params, recipe_config, threshold_dict, datetime_column): groupby_columns = ["country"] interval_restrictor = IntervalRestrictor(params) output_df = interval_restrictor.compute(long_df_numerical, datetime_column, threshold_dict, groupby_columns=groupby_columns) np.testing.assert_array_equal(output_df.Date.values, pd.DatetimeIndex(['1959-01-01T00:00:00.000000000', '1959-01-02T00:00:00.000000000', '1959-01-02T00:00:00.000000000', '1959-01-03T00:00:00.000000000'])) np.testing.assert_array_equal(output_df.country.values, np.array([1, 1, 2, 2]))
def test_zero_deviation_without_1st_row(self, edge_df_without_1st_row, config, threshold_dict, datetime_column): params = get_interval_restriction_params(config) interval_restrictor = IntervalRestrictor(params) output_df = interval_restrictor.compute(edge_df_without_1st_row, datetime_column, threshold_dict) assert output_df.loc[0, datetime_column] == pd.Timestamp("2020-07-03") assert output_df.loc[3, datetime_column] == pd.Timestamp("2020-07-08") assert len(output_df.index) == 4
def test_empty_identifiers(self, df, params, recipe_config, threshold_dict, datetime_column): interval_restrictor = IntervalRestrictor(params) output_df = interval_restrictor.compute(df, datetime_column, threshold_dict, groupby_columns=[]) assert output_df.shape == (4, 5) output_df = interval_restrictor.compute(df, datetime_column, threshold_dict) assert output_df.shape == (4, 5) output_df = interval_restrictor.compute(df, datetime_column, threshold_dict, groupby_columns=None) assert output_df.shape == (4, 5)
def test_mix_identifiers(self, long_df_4, params, recipe_config, threshold_dict, datetime_column): groupby_columns = ["country", "item", "store"] interval_restrictor = IntervalRestrictor(params) output_df = interval_restrictor.compute(long_df_4, datetime_column, threshold_dict, groupby_columns=groupby_columns) np.testing.assert_array_equal(output_df.Date.values, pd.DatetimeIndex(['2020-01-31T00:00:00.000000000', '2020-02-29T00:00:00.000000000', '2020-02-29T00:00:00.000000000', '2020-01-31T00:00:00.000000000', '2020-01-31T00:00:00.000000000', '2020-02-29T00:00:00.000000000', '2020-02-29T00:00:00.000000000']))
def test_zero_deviation_edges(self, edge_df, config, threshold_dict, datetime_column): # [ch54733] - check if the recipe properly handles the first and the last rows params = get_interval_restriction_params(config) interval_restrictor = IntervalRestrictor(params) output_df = interval_restrictor.compute(edge_df, datetime_column, threshold_dict) assert len(output_df.index) == 7 assert output_df.loc[0, datetime_column] == pd.Timestamp("2020-07-01") assert output_df.loc[6, datetime_column] == pd.Timestamp("2020-07-12")
def test_day(self, config, threshold_dict, columns): config["time_unit"] = "days" params = get_interval_restriction_params(config) df = get_df_DST("W", columns) interval_restrictor = IntervalRestrictor(params) output_df = interval_restrictor.compute(df, columns.date, threshold_dict) expected_dates = pd.DatetimeIndex(['2019-02-03T00:59:00.000000000', '2019-02-10T00:59:00.000000000', '2019-02-17T00:59:00.000000000', '2019-02-24T00:59:00.000000000']) np.testing.assert_array_equal(expected_dates, output_df[columns.date].values)
def test_microseconds(self, config, threshold_dict, columns): config["time_unit"] = "microseconds" params = get_interval_restriction_params(config) df = get_df_DST("U", columns) interval_restrictor = IntervalRestrictor(params) output_df = interval_restrictor.compute(df, columns.date, threshold_dict) expected_dates = pd.DatetimeIndex(['2019-01-31T00:59:00.000000000', '2019-01-31T00:59:00.000001000', '2019-01-31T00:59:00.000002000', '2019-01-31T00:59:00.000003000']) np.testing.assert_array_equal(expected_dates, output_df[columns.date].values) assert np.all(output_df["interval_id"].values == "0")
def test_zero_deviation_annual_edges(self, annual_edge_df, config, threshold_dict, datetime_column): params = get_interval_restriction_params(config) interval_restrictor = IntervalRestrictor(params) df_test = annual_edge_df.copy() df_test.loc[:, datetime_column] = pd.to_datetime(df_test[datetime_column]) df_test = df_test.set_index(datetime_column).sort_index() df_initialized = interval_restrictor._initialize_edges(df_test) assert df_initialized.index[0] == pd.Timestamp("2010-12-31") assert df_initialized.index[-1] == pd.Timestamp("2022-01-02") output_df = interval_restrictor.compute(annual_edge_df, datetime_column, threshold_dict) assert len(output_df.index) == 7 assert output_df.loc[0, datetime_column] == pd.Timestamp("2011-01-01") assert output_df.loc[6, datetime_column] == pd.Timestamp("2022-01-01")
format='timeseries-preparation plugin %(levelname)s - %(message)s') check_python_version() # --- Setup (input_dataset, output_dataset) = get_input_output() recipe_config = get_recipe_config() input_dataset_columns = [ column["name"] for column in input_dataset.read_schema() ] check_time_column_parameter(recipe_config, input_dataset_columns) datetime_column = recipe_config.get('datetime_column') value_column = recipe_config.get('value_column') min_threshold = recipe_config.get('min_threshold') max_threshold = recipe_config.get('max_threshold') threshold_dict = {value_column: (min_threshold, max_threshold)} groupby_columns = check_and_get_groupby_columns(recipe_config, input_dataset_columns) params = get_interval_restriction_params(recipe_config) # --- Run df = input_dataset.get_dataframe() interval_restrictor = IntervalRestrictor(params) output_df = interval_restrictor.compute(df, datetime_column, threshold_dict, groupby_columns=groupby_columns) # --- Write output output_dataset.write_with_schema(output_df)
def test_segment_beginning(self, edge_df_segment, config, threshold_dict, datetime_column): params = get_interval_restriction_params(config) interval_restrictor = IntervalRestrictor(params) output_df = interval_restrictor.compute(edge_df_segment, datetime_column, threshold_dict) assert np.all(output_df.interval_id.values[:4] == "0") assert output_df.loc[0, datetime_column] == pd.Timestamp("2020-07-01")