def test_config_valid(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("some_new_key.csv", "some_bucket") errors = config.validate() assert not errors
def test_config_dependent_dataset_dependencies(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("DatasetsFromRetailDemandTRMProphet", "some_bucket") datasets = config.datasets(dataset_file)
def test_config_validation_bad_dataset_reference(configuration_data): config = Config() config.config = configuration_data configuration_data["InvalidReference"] = { "DatasetGroup": { "Domain": "Retail" }, "Datasets": { "From": "DoesNotExist" }, "Predictor": { "PerformAutoML": True, "ForecastHorizon": 30, "FeaturizationConfig": { "ForecastFrequency": "D" }, }, "Forecast": { "ForecastTypes": ["0.50"] }, } errors = config.validate() assert len(errors) == 1 assert "no config found for datasets in that group" in errors[0]
def test_config_required_datasets(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("some_new_key.csv", "some_bucket") assert config.required_datasets(dataset_file) == ["TARGET_TIME_SERIES"]
def test_forecast_arn(forecast_stub, configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket") forecast = config.forecast(dataset_file, "RetailDemandTNPTS") forecast.cli = forecast_stub.client forecast_stub.add_response( "list_forecasts", { "Forecasts": [ { "LastModificationTime": datetime(2015, 1, 1), "ForecastArn": "arn:2015-1-1", }, { "LastModificationTime": datetime(2017, 1, 1), "ForecastArn": "arn:2017-1-1", }, ] }, ) assert forecast.arn == "arn:2017-1-1"
def test_forecast_history(forecast_stub, configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket") forecast = config.forecast(dataset_file, "RetailDemandTNPTS") forecast.cli = forecast_stub.client forecast_stub.add_response( "list_forecasts", { "Forecasts": [ { "LastModificationTime": datetime(2015, 1, 1), "ForecastArn": "arn:2015-1-1", "Status": "ACTIVE", }, { "LastModificationTime": datetime(2017, 1, 1), "ForecastArn": "arn:2017-1-1", "Status": "CREATE_IN_PROGRESS", }, ] }, ) history = forecast.history() assert history[0].get("LastModificationTime") == datetime(2017, 1, 1) assert history[1].get("LastModificationTime") == datetime(2015, 1, 1)
def test_dataset_default(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("some_new_key.csv", "some_bucket") ds = config.dataset(dataset_file) assert ds.data_frequency == "D" assert ds.dataset_type == DatasetType.TARGET_TIME_SERIES assert ds.dataset_domain == DatasetDomain.RETAIL assert ds.dataset_name == "some_new_key" assert ds.dataset_schema == { "Attributes": [ { "AttributeName": "item_id", "AttributeType": "string" }, { "AttributeName": "timestamp", "AttributeType": "timestamp", }, { "AttributeName": "demand", "AttributeType": "float" }, ] }
def test_status_still_good(forecast_stub, configuration_data, expected_dataset_arns): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket") predictor = config.predictor(dataset_file) predictor.cli = forecast_stub.client forecast_stub.add_response( "list_predictors", { "Predictors": [ {"PredictorArn": "arn:", "CreationTime": datetime.now(timezone.utc)} ] }, ) forecast_stub.add_response( "describe_dataset_group", {"DatasetArns": expected_dataset_arns} ) for arn in expected_dataset_arns: forecast_stub.add_response( "describe_dataset", {"Status": "ACTIVE", "DatasetArn": arn} ) forecast_stub.add_response( "describe_predictor", {"CreationTime": datetime.now(timezone.utc), "Status": "ACTIVE"}, ) assert predictor.status == Status.ACTIVE
def test_dataset_import_job_arn(configuration_data, forecast_stub, mocker): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset_import_job = config.dataset_import_job(dataset_file) # create some job history forecast_stub.add_response( "list_dataset_import_jobs", { "DatasetImportJobs": [ { "LastModificationTime": datetime(2015, 1, 1), "DatasetImportJobArn": "arn:2015-1-1", }, { "LastModificationTime": datetime(2017, 1, 1), "DatasetImportJobArn": "arn:aws:forecast:abcdefghijkl:us-east-1:dataset-import-job/RetailDemandTRM/RetailDemandTRM_2017_01_01_00_00_00", }, { "LastModificationTime": datetime(2016, 1, 1), "DatasetImportJobArn": "arn:2016-1-1", }, ] }, ) dataset_import_job.cli = forecast_stub.client assert ( dataset_import_job.arn == f"arn:aws:forecast:abcdefghijkl:us-east-1:dataset-import-job/RetailDemandTRM/RetailDemandTRM_2017_01_01_00_00_00" )
def test_predictor_arn(forecast_stub, configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket") predictor = config.predictor(dataset_file, "RetailDemandTNPTS") predictor.cli = forecast_stub.client forecast_stub.add_response( "list_predictors", { "Predictors": [ { "CreationTime": datetime(2015, 1, 1), "PredictorArn": "arn:2015-1-1" }, { "CreationTime": datetime(2017, 1, 1), "PredictorArn": "arn:2017-1-1" }, ] }, ) assert predictor.arn == "arn:2017-1-1"
def mocked_dsg(dataset_file, configuration_data, mocker): with mock_sts(): config = Config() config.config = configuration_data dataset_group = config.dataset_group(dataset_file) dsg = DatasetGroup( dataset_group_name=dataset_group.dataset_group_name, dataset_domain=dataset_group.dataset_group_domain, ) dsg.cli = mocker.MagicMock() dsg.cli.describe_dataset_group.return_value = { "DatasetArns": ["arn::1", "arn::2", "arn::3"] } dsg.cli.describe_dataset.return_value = {"DatasetArn": 'arn::1', "Status": "ACTIVE", 'LastModificationTime': datetime.now()} dsg.cli.get_paginator().paginate.return_value = [{ 'DatasetImportJobs': [ { "DatasetImportJobArn": f"arn::{i}", "Status": "ACTIVE", "LastModificationTime": datetime.now() } for i in range(3) ] }] yield dsg
def test_predictor_history(forecast_stub, configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket") predictor = config.predictor(dataset_file, "RetailDemandTNPTS") predictor.cli = forecast_stub.client forecast_stub.add_response( "list_predictors", { "Predictors": [ { "CreationTime": datetime(2015, 1, 1), "PredictorArn": "arn:2015-1-1", "Status": "ACTIVE", }, { "CreationTime": datetime(2017, 1, 1), "PredictorArn": "arn:2017-1-1", "Status": "CREATE_IN_PROGRESS", }, ] }, ) history = predictor.history() assert history[0].get("CreationTime") == datetime(2017, 1, 1) assert history[1].get("CreationTime") == datetime(2015, 1, 1)
def test_config_dataset_groups(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRMProphet", "some_bucket") dsgs = config.dataset_groups(dataset_file) assert len(dsgs) == 2
def test_config_validation_doesnt_mutate_config(configuration_data): config = Config() config.config = configuration_data config_copy = copy.deepcopy(configuration_data) config.validate() assert config.config == config_copy
def test_config_required_datasets_override(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("Override.csv", "some_bucket") required_datasets = config.required_datasets(dataset_file) assert "TARGET_TIME_SERIES" in required_datasets assert "RELATED_TIME_SERIES" in required_datasets assert "ITEM_METADATA" in required_datasets
def test_config_dependent_dataset_groups(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRMProphet", "some_bucket") dependents = config.dependent_dataset_groups(dataset_file) assert len(dependents) == 2 assert "DatasetsFromRetailDemandTRMProphet" in dependents
def test_dataset_group_mismatch(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("Mismatch.csv", "some_bucket") with pytest.raises(ValueError) as excinfo: config.dataset_group(dataset_file) assert "must match" in str(excinfo.value)
def test_duplicate_timeseries(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandDuplicateDatasets.csv", "some_bucket") with pytest.raises(ValueError) as excinfo: config.required_datasets(dataset_file) assert "duplicate dataset types" in str(excinfo.value)
def test_config_predictor_from_dependent(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRMProphet", "some_bucket") predictor = config.predictor(dataset_file, "DatasetsFromRetailDemandTRMProphet") assert (predictor.validator.expected_params["AlgorithmArn"] == "arn:aws:forecast:::algorithm/CNN-QR")
def test_missing_timeseries(configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandForgottenDatasets.csv", "some_bucket") with pytest.raises(ValueError) as excinfo: config.required_datasets(dataset_file) assert "you must configure a TARGET_TIME_SERIES dataset" in str( excinfo.value)
def test_create(dataset_file, configuration_data): config = Config() config.config = configuration_data dataset_group = config.dataset_group(dataset_file) dsg = DatasetGroup( dataset_group_name=dataset_group.dataset_group_name, dataset_domain=dataset_group.dataset_group_domain, ) assert dsg.arn == "arn:aws:forecast:us-east-1:abcdefghijkl:dataset-group/data"
def test_status_not_yet_created(forecast_stub, configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket") forecast = config.forecast(dataset_file, "RetailDemandTNPTS") forecast.cli = forecast_stub.client forecast_stub.add_response("list_forecasts", {"Forecasts": []}) assert forecast.status == Status.DOES_NOT_EXIST forecast_stub.assert_no_pending_responses()
def test_init_forecast(forecast_stub, configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket") forecast = config.forecast(dataset_file, "RetailDemandTNPTS") dataset_group = config.dataset_group(dataset_file) assert ( forecast._dataset_group.dataset_group_name == dataset_group.dataset_group_name ) assert forecast._forecast_config == config.config_item(dataset_file, "Forecast")
def test_status(dataset_file, configuration_data, mock_forecast_dsg_exists): config = Config() config.config = configuration_data dataset_group = config.dataset_group(dataset_file) dsg = DatasetGroup( dataset_group_name=dataset_group.dataset_group_name, dataset_domain=dataset_group.dataset_group_domain, ) dsg.cli = mock_forecast_dsg_exists assert dsg.status == Status.ACTIVE
def test_init_predictor(forecast_stub, configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket") predictor = config.predictor(dataset_file, "RetailDemandTNPTS") predictor.cli = forecast_stub.client assert predictor._dataset_file == dataset_file for k, v in config.config_item(dataset_file, "Predictor").items(): if k != "MaxAge": assert predictor._predictor_params.get(k) == v
def test_dataset_import_timestamp_format_none(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) forecast_stub.add_response("list_dataset_import_jobs", {"DatasetImportJobs": []}) dataset.cli = forecast_stub.client assert dataset.timestamp_format == None
def test_dataset_status_lifecycle(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) forecast_stub.add_client_error("describe_dataset", "ResourceNotFoundException") forecast_stub.add_response("describe_dataset", {"Status": "ACTIVE"}) dataset.cli = forecast_stub.client assert dataset.status == Status.DOES_NOT_EXIST assert dataset.status == "ACTIVE"
def test_can_update(forecast_stub, configuration_data, expected_dataset_arns): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket") predictor = config.predictor(dataset_file) predictor.cli = forecast_stub.client forecast_stub.add_response( "describe_dataset_group", {"DatasetArns": expected_dataset_arns} ) for arn in expected_dataset_arns: forecast_stub.add_response("describe_dataset", {"Status": "ACTIVE"}) assert predictor.can_update
def test_dataset_create_noop_errors(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) configured_dataset = configuration_data.get("RetailDemandTRM").get( "Datasets")[2] params = { "DatasetType": configured_dataset.get("DatasetType"), "DatasetName": "RetailDemandTRM", "Domain": configured_dataset.get("Domain"), "Schema": configured_dataset.get("Schema"), "DataFrequency": configured_dataset.get("DataFrequency"), } create_params = deepcopy(params) create_params["Tags"] = [{"Key": "SolutionId", "Value": "SOL0123"}] forecast_stub.add_response( "describe_dataset", params, ) forecast_stub.add_response("create_dataset", {"DatasetArn": dataset.arn}, create_params) forecast_stub.add_response( "describe_dataset", params, ) dataset.cli = forecast_stub.client dataset.create() # clobber the values to trigger some exceptions # this is likey caused by a user changing configuration unexpectedly dataset._dataset_type = DatasetType.RELATED_TIME_SERIES dataset._dataset_domain = DatasetDomain.WORK_FORCE dataset._data_frequency = DataFrequency("1min") dataset._dataset_schema = {} with pytest.raises(ValueError) as excinfo: dataset.create() assert "dataset type" in str(excinfo.value) assert "dataset domain" in str(excinfo.value) assert "data frequency" in str(excinfo.value) assert "dataset schema" in str(excinfo.value)
def test_dataset_import_job_default(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("some_new_key.csv", "some_bucket") dsij = config.dataset_import_job(dataset_file) assert (dsij.dataset_arn == "arn:aws:forecast:us-east-1:abcdefghijkl:dataset/some_new_key") # the stubber needs to be initialized because the ARN needs dataset import job history dsij.cli = forecast_stub.client forecast_stub.add_response(method="list_dataset_import_jobs", service_response={"DatasetImportJobs": []}) assert not dsij.arn