Example #1
0
def test_config_valid(configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("some_new_key.csv", "some_bucket")
    errors = config.validate()
    assert not errors
def test_config_dependent_dataset_dependencies(configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("DatasetsFromRetailDemandTRMProphet",
                               "some_bucket")
    datasets = config.datasets(dataset_file)
def test_config_validation_bad_dataset_reference(configuration_data):
    config = Config()
    config.config = configuration_data

    configuration_data["InvalidReference"] = {
        "DatasetGroup": {
            "Domain": "Retail"
        },
        "Datasets": {
            "From": "DoesNotExist"
        },
        "Predictor": {
            "PerformAutoML": True,
            "ForecastHorizon": 30,
            "FeaturizationConfig": {
                "ForecastFrequency": "D"
            },
        },
        "Forecast": {
            "ForecastTypes": ["0.50"]
        },
    }

    errors = config.validate()
    assert len(errors) == 1
    assert "no config found for datasets in that group" in errors[0]
Example #4
0
def test_config_required_datasets(configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("some_new_key.csv", "some_bucket")

    assert config.required_datasets(dataset_file) == ["TARGET_TIME_SERIES"]
def test_forecast_arn(forecast_stub, configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket")
    forecast = config.forecast(dataset_file, "RetailDemandTNPTS")

    forecast.cli = forecast_stub.client
    forecast_stub.add_response(
        "list_forecasts",
        {
            "Forecasts": [
                {
                    "LastModificationTime": datetime(2015, 1, 1),
                    "ForecastArn": "arn:2015-1-1",
                },
                {
                    "LastModificationTime": datetime(2017, 1, 1),
                    "ForecastArn": "arn:2017-1-1",
                },
            ]
        },
    )

    assert forecast.arn == "arn:2017-1-1"
def test_forecast_history(forecast_stub, configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket")
    forecast = config.forecast(dataset_file, "RetailDemandTNPTS")

    forecast.cli = forecast_stub.client
    forecast_stub.add_response(
        "list_forecasts",
        {
            "Forecasts": [
                {
                    "LastModificationTime": datetime(2015, 1, 1),
                    "ForecastArn": "arn:2015-1-1",
                    "Status": "ACTIVE",
                },
                {
                    "LastModificationTime": datetime(2017, 1, 1),
                    "ForecastArn": "arn:2017-1-1",
                    "Status": "CREATE_IN_PROGRESS",
                },
            ]
        },
    )

    history = forecast.history()
    assert history[0].get("LastModificationTime") == datetime(2017, 1, 1)
    assert history[1].get("LastModificationTime") == datetime(2015, 1, 1)
Example #7
0
def test_dataset_default(configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("some_new_key.csv", "some_bucket")

    ds = config.dataset(dataset_file)
    assert ds.data_frequency == "D"
    assert ds.dataset_type == DatasetType.TARGET_TIME_SERIES
    assert ds.dataset_domain == DatasetDomain.RETAIL
    assert ds.dataset_name == "some_new_key"
    assert ds.dataset_schema == {
        "Attributes": [
            {
                "AttributeName": "item_id",
                "AttributeType": "string"
            },
            {
                "AttributeName": "timestamp",
                "AttributeType": "timestamp",
            },
            {
                "AttributeName": "demand",
                "AttributeType": "float"
            },
        ]
    }
def test_status_still_good(forecast_stub, configuration_data, expected_dataset_arns):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket")
    predictor = config.predictor(dataset_file)

    predictor.cli = forecast_stub.client
    forecast_stub.add_response(
        "list_predictors",
        {
            "Predictors": [
                {"PredictorArn": "arn:", "CreationTime": datetime.now(timezone.utc)}
            ]
        },
    )
    forecast_stub.add_response(
        "describe_dataset_group", {"DatasetArns": expected_dataset_arns}
    )
    for arn in expected_dataset_arns:
        forecast_stub.add_response(
            "describe_dataset", {"Status": "ACTIVE", "DatasetArn": arn}
        )
    forecast_stub.add_response(
        "describe_predictor",
        {"CreationTime": datetime.now(timezone.utc), "Status": "ACTIVE"},
    )

    assert predictor.status == Status.ACTIVE
Example #9
0
def test_dataset_import_job_arn(configuration_data, forecast_stub, mocker):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket")
    dataset_import_job = config.dataset_import_job(dataset_file)

    # create some job history
    forecast_stub.add_response(
        "list_dataset_import_jobs",
        {
            "DatasetImportJobs": [
                {
                    "LastModificationTime": datetime(2015, 1, 1),
                    "DatasetImportJobArn": "arn:2015-1-1",
                },
                {
                    "LastModificationTime":
                    datetime(2017, 1, 1),
                    "DatasetImportJobArn":
                    "arn:aws:forecast:abcdefghijkl:us-east-1:dataset-import-job/RetailDemandTRM/RetailDemandTRM_2017_01_01_00_00_00",
                },
                {
                    "LastModificationTime": datetime(2016, 1, 1),
                    "DatasetImportJobArn": "arn:2016-1-1",
                },
            ]
        },
    )

    dataset_import_job.cli = forecast_stub.client
    assert (
        dataset_import_job.arn ==
        f"arn:aws:forecast:abcdefghijkl:us-east-1:dataset-import-job/RetailDemandTRM/RetailDemandTRM_2017_01_01_00_00_00"
    )
Example #10
0
def test_predictor_arn(forecast_stub, configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket")
    predictor = config.predictor(dataset_file, "RetailDemandTNPTS")

    predictor.cli = forecast_stub.client
    forecast_stub.add_response(
        "list_predictors",
        {
            "Predictors": [
                {
                    "CreationTime": datetime(2015, 1, 1),
                    "PredictorArn": "arn:2015-1-1"
                },
                {
                    "CreationTime": datetime(2017, 1, 1),
                    "PredictorArn": "arn:2017-1-1"
                },
            ]
        },
    )

    assert predictor.arn == "arn:2017-1-1"
Example #11
0
def mocked_dsg(dataset_file, configuration_data, mocker):
    with mock_sts():
        config = Config()
        config.config = configuration_data

        dataset_group = config.dataset_group(dataset_file)
        dsg = DatasetGroup(
            dataset_group_name=dataset_group.dataset_group_name,
            dataset_domain=dataset_group.dataset_group_domain,
        )

        dsg.cli = mocker.MagicMock()
        dsg.cli.describe_dataset_group.return_value = {
            "DatasetArns": ["arn::1", "arn::2", "arn::3"]
        }
        dsg.cli.describe_dataset.return_value = {"DatasetArn": 'arn::1', "Status": "ACTIVE", 'LastModificationTime': datetime.now()}

        dsg.cli.get_paginator().paginate.return_value = [{
            'DatasetImportJobs': [
                {
                    "DatasetImportJobArn": f"arn::{i}",
                    "Status": "ACTIVE",
                    "LastModificationTime": datetime.now()
                }
                for i in range(3)
            ]
        }]

        yield dsg
Example #12
0
def test_predictor_history(forecast_stub, configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket")
    predictor = config.predictor(dataset_file, "RetailDemandTNPTS")

    predictor.cli = forecast_stub.client
    forecast_stub.add_response(
        "list_predictors",
        {
            "Predictors": [
                {
                    "CreationTime": datetime(2015, 1, 1),
                    "PredictorArn": "arn:2015-1-1",
                    "Status": "ACTIVE",
                },
                {
                    "CreationTime": datetime(2017, 1, 1),
                    "PredictorArn": "arn:2017-1-1",
                    "Status": "CREATE_IN_PROGRESS",
                },
            ]
        },
    )

    history = predictor.history()
    assert history[0].get("CreationTime") == datetime(2017, 1, 1)
    assert history[1].get("CreationTime") == datetime(2015, 1, 1)
def test_config_dataset_groups(configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRMProphet", "some_bucket")
    dsgs = config.dataset_groups(dataset_file)

    assert len(dsgs) == 2
Example #14
0
def test_config_validation_doesnt_mutate_config(configuration_data):
    config = Config()
    config.config = configuration_data

    config_copy = copy.deepcopy(configuration_data)
    config.validate()

    assert config.config == config_copy
Example #15
0
def test_config_required_datasets_override(configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("Override.csv", "some_bucket")
    required_datasets = config.required_datasets(dataset_file)
    assert "TARGET_TIME_SERIES" in required_datasets
    assert "RELATED_TIME_SERIES" in required_datasets
    assert "ITEM_METADATA" in required_datasets
def test_config_dependent_dataset_groups(configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRMProphet", "some_bucket")

    dependents = config.dependent_dataset_groups(dataset_file)
    assert len(dependents) == 2
    assert "DatasetsFromRetailDemandTRMProphet" in dependents
Example #17
0
def test_dataset_group_mismatch(configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("Mismatch.csv", "some_bucket")
    with pytest.raises(ValueError) as excinfo:
        config.dataset_group(dataset_file)

    assert "must match" in str(excinfo.value)
Example #18
0
def test_duplicate_timeseries(configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandDuplicateDatasets.csv",
                               "some_bucket")
    with pytest.raises(ValueError) as excinfo:
        config.required_datasets(dataset_file)

    assert "duplicate dataset types" in str(excinfo.value)
def test_config_predictor_from_dependent(configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRMProphet", "some_bucket")

    predictor = config.predictor(dataset_file,
                                 "DatasetsFromRetailDemandTRMProphet")
    assert (predictor.validator.expected_params["AlgorithmArn"] ==
            "arn:aws:forecast:::algorithm/CNN-QR")
Example #20
0
def test_missing_timeseries(configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandForgottenDatasets.csv",
                               "some_bucket")
    with pytest.raises(ValueError) as excinfo:
        config.required_datasets(dataset_file)

    assert "you must configure a TARGET_TIME_SERIES dataset" in str(
        excinfo.value)
Example #21
0
def test_create(dataset_file, configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_group = config.dataset_group(dataset_file)
    dsg = DatasetGroup(
        dataset_group_name=dataset_group.dataset_group_name,
        dataset_domain=dataset_group.dataset_group_domain,
    )

    assert dsg.arn == "arn:aws:forecast:us-east-1:abcdefghijkl:dataset-group/data"
def test_status_not_yet_created(forecast_stub, configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket")
    forecast = config.forecast(dataset_file, "RetailDemandTNPTS")

    forecast.cli = forecast_stub.client
    forecast_stub.add_response("list_forecasts", {"Forecasts": []})

    assert forecast.status == Status.DOES_NOT_EXIST
    forecast_stub.assert_no_pending_responses()
def test_init_forecast(forecast_stub, configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket")
    forecast = config.forecast(dataset_file, "RetailDemandTNPTS")
    dataset_group = config.dataset_group(dataset_file)

    assert (
        forecast._dataset_group.dataset_group_name == dataset_group.dataset_group_name
    )
    assert forecast._forecast_config == config.config_item(dataset_file, "Forecast")
Example #24
0
def test_status(dataset_file, configuration_data, mock_forecast_dsg_exists):
    config = Config()
    config.config = configuration_data

    dataset_group = config.dataset_group(dataset_file)
    dsg = DatasetGroup(
        dataset_group_name=dataset_group.dataset_group_name,
        dataset_domain=dataset_group.dataset_group_domain,
    )

    dsg.cli = mock_forecast_dsg_exists
    assert dsg.status == Status.ACTIVE
Example #25
0
def test_init_predictor(forecast_stub, configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket")
    predictor = config.predictor(dataset_file, "RetailDemandTNPTS")

    predictor.cli = forecast_stub.client

    assert predictor._dataset_file == dataset_file
    for k, v in config.config_item(dataset_file, "Predictor").items():
        if k != "MaxAge":
            assert predictor._predictor_params.get(k) == v
Example #26
0
def test_dataset_import_timestamp_format_none(configuration_data,
                                              forecast_stub):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket")
    dataset = config.dataset(dataset_file)

    forecast_stub.add_response("list_dataset_import_jobs",
                               {"DatasetImportJobs": []})
    dataset.cli = forecast_stub.client

    assert dataset.timestamp_format == None
def test_dataset_status_lifecycle(configuration_data, forecast_stub):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket")
    dataset = config.dataset(dataset_file)

    forecast_stub.add_client_error("describe_dataset", "ResourceNotFoundException")
    forecast_stub.add_response("describe_dataset", {"Status": "ACTIVE"})

    dataset.cli = forecast_stub.client

    assert dataset.status == Status.DOES_NOT_EXIST
    assert dataset.status == "ACTIVE"
def test_can_update(forecast_stub, configuration_data, expected_dataset_arns):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket")
    predictor = config.predictor(dataset_file)

    predictor.cli = forecast_stub.client
    forecast_stub.add_response(
        "describe_dataset_group", {"DatasetArns": expected_dataset_arns}
    )
    for arn in expected_dataset_arns:
        forecast_stub.add_response("describe_dataset", {"Status": "ACTIVE"})

    assert predictor.can_update
Example #29
0
def test_dataset_create_noop_errors(configuration_data, forecast_stub):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket")
    dataset = config.dataset(dataset_file)
    configured_dataset = configuration_data.get("RetailDemandTRM").get(
        "Datasets")[2]

    params = {
        "DatasetType": configured_dataset.get("DatasetType"),
        "DatasetName": "RetailDemandTRM",
        "Domain": configured_dataset.get("Domain"),
        "Schema": configured_dataset.get("Schema"),
        "DataFrequency": configured_dataset.get("DataFrequency"),
    }
    create_params = deepcopy(params)
    create_params["Tags"] = [{"Key": "SolutionId", "Value": "SOL0123"}]

    forecast_stub.add_response(
        "describe_dataset",
        params,
    )

    forecast_stub.add_response("create_dataset", {"DatasetArn": dataset.arn},
                               create_params)

    forecast_stub.add_response(
        "describe_dataset",
        params,
    )

    dataset.cli = forecast_stub.client
    dataset.create()

    # clobber the values to trigger some exceptions
    # this is likey caused by a user changing configuration unexpectedly
    dataset._dataset_type = DatasetType.RELATED_TIME_SERIES
    dataset._dataset_domain = DatasetDomain.WORK_FORCE
    dataset._data_frequency = DataFrequency("1min")
    dataset._dataset_schema = {}
    with pytest.raises(ValueError) as excinfo:
        dataset.create()

    assert "dataset type" in str(excinfo.value)
    assert "dataset domain" in str(excinfo.value)
    assert "data frequency" in str(excinfo.value)
    assert "dataset schema" in str(excinfo.value)
Example #30
0
def test_dataset_import_job_default(configuration_data, forecast_stub):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("some_new_key.csv", "some_bucket")

    dsij = config.dataset_import_job(dataset_file)
    assert (dsij.dataset_arn ==
            "arn:aws:forecast:us-east-1:abcdefghijkl:dataset/some_new_key")

    # the stubber needs to be initialized because the ARN needs dataset import job history
    dsij.cli = forecast_stub.client
    forecast_stub.add_response(method="list_dataset_import_jobs",
                               service_response={"DatasetImportJobs": []})

    assert not dsij.arn