Ejemplo n.º 1
0
    def test_no_groupby_columns(self, config, dataset_columns):
        config["groupby_columns"] = []
        config["groupby_column"] = "country"
        groupby_colums = check_and_get_groupby_columns(config, dataset_columns)
        assert len(groupby_colums) == 1

        config.pop("groupby_columns")
        groupby_colums = check_and_get_groupby_columns(config, dataset_columns)
        assert len(groupby_colums) == 1
Ejemplo n.º 2
0
    def test_check_and_get_groupby_columns(self, config):
        dataset_columns = ["value1", "country", "old"]
        groupby_colums = check_and_get_groupby_columns(config, dataset_columns)
        assert groupby_colums == ["country", "old"]

        config.pop("groupby_columns")
        config["groupby_column"] = "country"
        groupby_colums = check_and_get_groupby_columns(config, dataset_columns)
        assert groupby_colums == ["country"]
Ejemplo n.º 3
0
    def test_invalid_groupby_columns(self, config, dataset_columns):
        config["groupby_columns"] = ["country", ""]
        restricted_dataset_columns = ["value1", "country"]
        with pytest.raises(ValueError) as err:
            _ = check_and_get_groupby_columns(config, restricted_dataset_columns)
        assert "Invalid time series identifiers selection" in str(err.value)

        with pytest.raises(ValueError) as err:
            _ = check_and_get_groupby_columns(config, restricted_dataset_columns)
        assert "Invalid time series identifiers selection" in str(err.value)

        config.pop("groupby_columns")
        with pytest.raises(ValueError) as err:
            _ = check_and_get_groupby_columns(config, restricted_dataset_columns)
        assert "Long format is activated but no time series identifiers have been provided" in str(err.value)

        config["groupby_column"] = "not_ok"
        with pytest.raises(ValueError) as err:
            _ = check_and_get_groupby_columns(config, dataset_columns)
        assert "Invalid time series identifiers selection" in str(err.value)
from dataiku.customrecipe import get_recipe_config

from dku_timeseries import Resampler
from io_utils import get_input_output
from recipe_config_loading import check_and_get_groupby_columns, check_time_column_parameter, check_python_version, get_resampling_params

check_python_version()

# --- Setup
(input_dataset, output_dataset) = get_input_output()
recipe_config = get_recipe_config()
input_dataset_columns = [
    column["name"] for column in input_dataset.read_schema()
]
check_time_column_parameter(recipe_config, input_dataset_columns)
groupby_columns = check_and_get_groupby_columns(recipe_config,
                                                input_dataset_columns)
datetime_column = recipe_config.get('datetime_column')
params = get_resampling_params(recipe_config)

# --- Run
df = input_dataset.get_dataframe()
resampler = Resampler(params)
output_df = resampler.transform(df,
                                datetime_column,
                                groupby_columns=groupby_columns)

# --- Write output
output_dataset.write_with_schema(output_df)
Ejemplo n.º 5
0
 def test_two_identifiers_fields(self, config, dataset_columns):
     config["groupby_column"] = "notice"
     config["groupby_columns"] = ["country"]
     groupby_colums = check_and_get_groupby_columns(config, dataset_columns)
     assert groupby_colums == ["country"]