def test_status_not_yet_created(
    forecast_stub, configuration_data, expected_dataset_arns
):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket")
    predictor = config.predictor(dataset_file)

    predictor.cli = forecast_stub.client
    forecast_stub.add_response("list_predictors", {"Predictors": []})
    forecast_stub.add_response(
        "describe_dataset_group", {"DatasetArns": expected_dataset_arns}
    )
    for arn in expected_dataset_arns:
        forecast_stub.add_response(
            "describe_dataset", {"Status": "ACTIVE", "DatasetArn": arn}
        )

    assert predictor.status == Status.DOES_NOT_EXIST
    forecast_stub.assert_no_pending_responses()
def notification(event: dict, context):
    """Handles an S3 Event Notification (for any .csv file written to any key under train/*)

    :param dict event: AWS Lambda Event (in this case, an S3 Event message)
    :param context: The AWS Lambda Context object
    :return: None
    """

    # Get the event data, then read the default config file
    evt = Event(event)
    s3_config = None

    # Build the input to the state machine
    state_input = {"bucket": evt.bucket, "dataset_file": evt.key}
    logger.info("Triggered by s3 notification on bucket %s, key %s" %
                (evt.bucket, evt.key))
    try:
        s3_config = Config.from_s3(evt.bucket)
        state_input["config"] = s3_config.config
    except ConfigNotFound as excinfo:
        logger.warning("The configuration file was not found")
        state_input["serviceError"] = {
            "Error": "ConfigNotFound",
            "Cause": json.dumps({"errorMessage": str(excinfo)}),
        }
    except ValueError as excinfo:
        logger.warning("There was a problem with the config file: %s" %
                       str(excinfo))
        state_input["serviceError"] = {
            "Error": "ValueError",
            "Cause": json.dumps({"errorMessage": str(excinfo)}),
        }

    # validate the config file if it loaded properly
    if s3_config:
        errors = s3_config.validate()
        if errors:
            for error in errors:
                logger.warning("config problem: %s" % error)

            state_input["serviceError"] = {
                "Error": "ConfigError",
                "Cause": json.dumps({"errorMessage": "\n".join(errors)}),
            }

    # Start the AWS Step Function automation of Amazon Forecast
    sfn = get_sfn_client()
    sfn.start_execution(
        stateMachineArn=environ.get("STEP_FUNCTIONS_ARN"),
        name=evt.event_id,
        input=json.dumps(state_input),
    )
def test_notification(mock_config, s3_event, mocker):
    # an invalid configuration - default without any of the required keys
    config = Config()
    config.config = {"my": {"name": "bob"}}

    # simulate invalid config
    config_mock = mocker.MagicMock()
    config_mock.from_s3.return_value = config
    mocker.patch("lambdas.notification.handler.Config", config_mock)

    mock_config = mock_config.return_value
    mock_config.from_s3.return_value = Config()

    client_mock = mocker.MagicMock()
    mocker.patch("lambdas.notification.handler.get_sfn_client", client_mock)

    handler.notification(s3_event, None)

    args, kwargs = client_mock().start_execution.call_args
    assert kwargs.get("stateMachineArn") == os.getenv("STATE_MACHINE_ARN")
    assert "test-key_target_time_series_" in kwargs.get("name")
    assert "input" in kwargs.keys()
Esempio n. 4
0
def test_dataset_import_timestamp_format(configuration_data, forecast_stub,
                                         format):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket")
    dataset = config.dataset(dataset_file)

    forecast_stub.add_response(
        "list_dataset_import_jobs",
        {
            "DatasetImportJobs": [{
                "DatasetImportJobArn": "arn:something",
                "LastModificationTime": datetime(2015, 1, 1),
            }]
        },
    )
    forecast_stub.add_response("describe_dataset_import_job",
                               {"TimestampFormat": format})
    dataset.cli = forecast_stub.client

    assert dataset.timestamp_format == format
Esempio n. 5
0
def etl_forecast_trm(sfn_configuration_data, s3_valid_files):
    """This represents a single file upload"""
    config = Config.from_sfn(sfn_configuration_data)

    with mock_sts():
        dataset_file = DatasetFile(key="train/RetailDemandTRM.csv", bucket="testbucket")
        forecast = config.forecast(dataset_file, "RetailDemandTRM")
        yield ForecastETL(
            workgroup="primary",
            schema="default",
            config=config,
            dataset_file=dataset_file,
            forecast=forecast,
        )
def test_dataset_create_noop_errors(configuration_data, forecast_stub):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket")
    dataset = config.dataset(dataset_file)
    configured_dataset = configuration_data.get("RetailDemandTRM").get("Datasets")[2]

    for i in range(0, 2):
        forecast_stub.add_response(
            "describe_dataset",
            {
                "DatasetType": configured_dataset.get("DatasetType"),
                "DatasetName": "RetailDemandTRM",
                "Domain": configured_dataset.get("Domain"),
                "Schema": configured_dataset.get("Schema"),
                "DataFrequency": configured_dataset.get("DataFrequency"),
            },
        )

    # should not call anything
    dataset.cli = forecast_stub.client
    dataset.create()

    # clobber the values to trigger some exceptions
    # this is likey caused by a user changing configuration unexpectedly
    dataset._dataset_type = DatasetType.RELATED_TIME_SERIES
    dataset._dataset_domain = DatasetDomain.WORK_FORCE
    dataset._data_frequency = DataFrequency("1min")
    dataset._dataset_schema = {}
    with pytest.raises(ValueError) as excinfo:
        dataset.create()

    assert "dataset type" in str(excinfo.value)
    assert "dataset domain" in str(excinfo.value)
    assert "data frequency" in str(excinfo.value)
    assert "dataset schema" in str(excinfo.value)
def createpredictor(event, context) -> (Status, str):
    """
    Create/ monitor Amazon Forecast predictor creation
    :param event: lambda event
    :param context: lambda context
    :return: predictor status and dataset ARN
    """
    config = Config.from_sfn(event)
    dataset_file = DatasetFile(event.get("dataset_file"), event.get("bucket"))

    predictor = config.predictor(dataset_file)
    if predictor.status == Status.DOES_NOT_EXIST:
        predictor.create()

    return predictor.status, predictor.arn
def createdatasetimportjob(event, context) -> (Status, str):
    """
    Create/ monitor Amazon Forecast dataset import job creation
    :param event: lambda event
    :param context: lambda context
    :return: dataset import job status and dataset ARN
    """
    config = Config.from_sfn(event)
    dataset_file = DatasetFile(event.get("dataset_file"), event.get("bucket"))

    dataset_import = config.dataset_import_job(dataset_file)
    if dataset_import.status == Status.DOES_NOT_EXIST:
        dataset_import.create()

    return dataset_import.status, dataset_import.arn
Esempio n. 9
0
def test_dataset_imports(configuration_data, forecast_stub):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket")
    dataset = config.dataset(dataset_file)

    forecast_stub.add_response(
        "list_dataset_import_jobs",
        {
            "DatasetImportJobs": [
                {
                    "DatasetImportJobArn": "arn::",
                    "DatasetImportJobName": "middle_job",
                    "LastModificationTime": datetime(2018, 1, 1),
                },
                {
                    "DatasetImportJobArn": "arn::",
                    "DatasetImportJobName": "end_job",
                    "LastModificationTime": datetime(2019, 1, 1),
                },
                {
                    "DatasetImportJobArn": "arn::",
                    "DatasetImportJobName": "early_job",
                    "LastModificationTime": datetime(2017, 1, 1),
                },
            ]
        },
    )

    dataset.cli = forecast_stub.client

    ds_imports = dataset.imports
    assert ds_imports[0].get("DatasetImportJobName") == "end_job"
    assert ds_imports[1].get("DatasetImportJobName") == "middle_job"
    assert ds_imports[2].get("DatasetImportJobName") == "early_job"
Esempio n. 10
0
def etl_forecast(request, sfn_configuration_data, s3_valid_files):
    """This represents all of the file uploads that might trigger a forecast"""
    config = Config.from_sfn(sfn_configuration_data)

    with mock_sts():
        dataset_file = DatasetFile(key=request.param.key, bucket="testbucket")
        forecast = config.forecast(dataset_file, request.param.key)
        yield (
            ForecastETL(
                workgroup="primary",
                schema="default",
                config=config,
                dataset_file=dataset_file,
                forecast=forecast,
            ),
            request.param,
        )
Esempio n. 11
0
def prepareexport(event, context):
    """
    Create consolidated export tables for forecast visualization
    :param event: lambda event
    :param context: lambda context
    :return: glue table name
    """
    config = Config.from_sfn(event)

    dataset_file = DatasetFile(event.get("dataset_file"), event.get("bucket"))
    dataset_group_name = event.get("dataset_group_name")
    forecast = config.forecast(dataset_file, dataset_group_name)

    workgroup = environ.get("WORKGROUP_NAME")
    schema = environ.get("SCHEMA_NAME")
    principal = environ.get("QUICKSIGHT_PRINCIPAL")
    source_template = environ.get("QUICKSIGHT_SOURCE")

    etl = ForecastETL(
        workgroup=workgroup,
        schema=schema,
        config=config,
        dataset_file=dataset_file,
        forecast=forecast,
    )

    try:
        etl.create_input_tables()
        etl.consolidate_data()
    except ValueError as e:
        if "already exists" not in str(e):
            raise e
    finally:
        etl.cleanup_temp_tables()

    # attempt to create QuickSight analysis
    qs = QuickSight(
        workgroup=workgroup,
        table_name=etl.output_table_name,
        schema=schema,
        principal=principal,
        source_template=source_template,
    )
    qs.create_data_source()
    qs.create_data_set()
    qs.create_analysis()
def test_init_forecast(forecast_stub, configuration_data):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket")
    forecast = config.forecast(dataset_file, "RetailDemandTNPTS")
    dataset_group = config.dataset_group(dataset_file)

    assert (
        forecast._dataset_group.dataset_group_name == dataset_group.dataset_group_name
    )
    assert forecast._forecast_config == config.config_item(dataset_file, "Forecast")
Esempio n. 13
0
def createdatasetgroup(event, context) -> (Status, str):
    """
    Create/ monitor Amazon Forecast dataset group creation
    :param event: lambda event
    :param context: lambda context
    :return: dataset group status and dataset group ARN
    """
    config = Config.from_sfn(event)
    dataset_file = DatasetFile(event.get("dataset_file"), event.get("bucket"))

    dataset_group = config.dataset_group(dataset_file)
    if dataset_group.status == Status.DOES_NOT_EXIST:
        dataset_group.create()

    if dataset_group.status == Status.ACTIVE:
        datasets = config.datasets(dataset_file)
        dataset_group.update(datasets)

    return dataset_group.status, dataset_group.arn
Esempio n. 14
0
def createforecast(event, context):
    """
    Create/ monitor Amazon Forecast forecast creation
    :param event: lambda event
    :param context: lambda context
    :return: forecast / forecast export status and forecast ARN
    """
    config = Config.from_sfn(event)
    dataset_file = DatasetFile(event.get("dataset_file"), event.get("bucket"))

    forecast = config.forecast(dataset_file)
    tracked = forecast

    if forecast.status == Status.DOES_NOT_EXIST:
        # TODO: publish predictor stats to CloudWatch prior to create
        logger.info("Creating forecast for %s" % dataset_file.prefix)
        forecast.create()

    if forecast.status == Status.ACTIVE:
        logger.info("Creating forecast export for %s" % dataset_file.prefix)
        tracked = forecast.export(dataset_file)

    return tracked.status, forecast.arn
import sys
sys.path.append('../ReinforcementLearning')

import unittest
test = unittest.TestCase()

from shared.grid import Grid
from shared.policy import Policy
from shared.rewards import Rewards
from shared.actions import Actions
from shared.config import Config
from shared.statesandblockers import StatesAndBlocks

from policyevaluation import PolicyEvaluation

config  = Config()
policy = Policy()
states_and_blocks = StatesAndBlocks(config.positive_terminal_states,config.negative_terminal_states,config.starting_states,config.blocking_states)
grid = Grid(config.numer_of_rows,config.number_of_columns , states_and_blocks)
rewards = Rewards(config.positive_reward, config.negative_reward, config.step_reward, states_and_blocks)
grid.show_grid_positions()

# testing if we get the correct state number based on position
test.assertEqual(grid.get_state_number(0,0), 0)
test.assertEqual(grid.get_state_number(0,1), 1)
test.assertEqual(grid.get_state_number(0,2), 2)
test.assertEqual(grid.get_state_number(0,3), 3)
test.assertEqual(grid.get_state_number(1,1), 5)
test.assertEqual(grid.get_state_number(1,2), 6)
test.assertEqual(grid.get_state_number(2,3), 11)
Esempio n. 16
0
def test_dataset_import_job_status_lifecycle(configuration_data, forecast_stub,
                                             mocker):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket")
    dataset_import_job = config.dataset_import_job(dataset_file)
    size = 40
    etag = "9d2990c88a30dac1785a09fbb46f3e10"

    # first call - doesn't exist
    forecast_stub.add_response("list_dataset_import_jobs",
                               {"DatasetImportJobs": []})
    forecast_stub.add_response(
        "list_dataset_import_jobs",
        {
            "DatasetImportJobs": [
                {
                    "LastModificationTime": datetime(2015, 1, 1),
                    "DatasetImportJobArn": "arn:2015-1-1",
                },
                {
                    "LastModificationTime": datetime(2017, 1, 1),
                    "DatasetImportJobArn": "arn:2017-1-1",
                },
                {
                    "LastModificationTime": datetime(2016, 1, 1),
                    "DatasetImportJobArn": "arn:2016-1-1",
                },
            ]
        },
    )
    forecast_stub.add_response(
        "describe_dataset_import_job",
        {"Status": "ACTIVE"},
    )
    forecast_stub.add_response(
        "list_tags_for_resource",
        {"Tags": [{
            "Key": "SolutionETag",
            "Value": etag
        }]})
    forecast_stub.add_response(
        "list_dataset_import_jobs",
        {
            "DatasetImportJobs": [
                {
                    "LastModificationTime": datetime(2015, 1, 1),
                    "DatasetImportJobArn": "arn:2015-1-1",
                },
                {
                    "LastModificationTime": datetime(2017, 1, 1),
                    "DatasetImportJobArn": "arn:2017-1-1",
                },
                {
                    "LastModificationTime": datetime(2016, 1, 1),
                    "DatasetImportJobArn": "arn:2016-1-1",
                },
            ]
        },
    )
    forecast_stub.add_response(
        "describe_dataset_import_job",
        {"Status": "ACTIVE"},
    )
    forecast_stub.add_response(
        "list_tags_for_resource",
        {
            "Tags": [{
                "Key": "SolutionETag",
                "Value": "9d2990c88a30dac1785a09fbb46f3e11"
            }]
        },
    )

    dataset_import_job.cli = forecast_stub.client
    mocker.patch(
        "shared.Dataset.dataset_file.DatasetFile.etag",
        new_callable=mocker.PropertyMock,
        return_value=etag,
    )

    assert dataset_import_job.status == Status.DOES_NOT_EXIST

    # simulate finding an active dataset
    assert dataset_import_job.status == Status.ACTIVE

    # simulate a new dataset (with more lines) uploaded
    assert dataset_import_job.status == Status.DOES_NOT_EXIST
def test_dataset_import_job_status_lifecycle(configuration_data, forecast_stub, mocker):
    config = Config()
    config.config = configuration_data

    dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket")
    dataset_import_job = config.dataset_import_job(dataset_file)
    size = 40

    # first call - doesn't exist
    forecast_stub.add_response("list_dataset_import_jobs", {"DatasetImportJobs": []})
    forecast_stub.add_response(
        "list_dataset_import_jobs",
        {
            "DatasetImportJobs": [
                {
                    "LastModificationTime": datetime(2015, 1, 1),
                    "DatasetImportJobArn": "arn:2015-1-1",
                },
                {
                    "LastModificationTime": datetime(2017, 1, 1),
                    "DatasetImportJobArn": "arn:2017-1-1",
                },
                {
                    "LastModificationTime": datetime(2016, 1, 1),
                    "DatasetImportJobArn": "arn:2016-1-1",
                },
            ]
        },
    )
    forecast_stub.add_response(
        "describe_dataset_import_job",
        {"Status": "ACTIVE", "FieldStatistics": {"item_id": {"Count": size}}},
    )
    forecast_stub.add_response("list_tags_for_resource", {"Tags": []})
    forecast_stub.add_response(
        "list_dataset_import_jobs",
        {
            "DatasetImportJobs": [
                {
                    "LastModificationTime": datetime(2015, 1, 1),
                    "DatasetImportJobArn": "arn:2015-1-1",
                },
                {
                    "LastModificationTime": datetime(2017, 1, 1),
                    "DatasetImportJobArn": "arn:2017-1-1",
                },
                {
                    "LastModificationTime": datetime(2016, 1, 1),
                    "DatasetImportJobArn": "arn:2016-1-1",
                },
            ]
        },
    )
    forecast_stub.add_response(
        "describe_dataset_import_job",
        {"Status": "ACTIVE", "FieldStatistics": {"item_id": {"Count": size + 1}}},
    )
    forecast_stub.add_response("list_tags_for_resource", {"Tags": []})

    dataset_import_job.cli = forecast_stub.client
    mocker.patch(
        "shared.Dataset.dataset_file.DatasetFile.size",
        new_callable=mocker.PropertyMock,
        return_value=size,
    )

    assert dataset_import_job.status == Status.DOES_NOT_EXIST

    # simulate finding an active dataset
    assert dataset_import_job.status == Status.ACTIVE

    # simulate a new dataset (with more lines) uploaded
    assert dataset_import_job.status == Status.DOES_NOT_EXIST
Esempio n. 18
0
class Reader:
    mail_servers: List[MailServer] = []
    senders: List[Sender] = []
    proxies: List[Proxy] = []
    recipients: List[Recipient] = []
    config = Config()
    message = Message(ATTACHMENTS_FOLDER)

    @staticmethod
    def __create_mail_server(line: str):
        # Split and validate
        parts = line.split(":")
        parts_len = len(parts)
        assert parts_len == 4 or parts_len == 5, "%s: %s" % (INVALID_LINE,
                                                             line)

        # Create new server object
        server = MailServer()
        server.host = parts[MAIL_SERVERS_HOST_INDEX]
        server.username = parts[MAIL_SERVERS_USERNAME_INDEX]
        server.password = parts[MAIL_SERVERS_PASSWORD_INDEX]
        server.port = int(parts[MAIL_SERVERS_PORT_INDEX])

        if parts_len == 5:
            server.limit = int(parts[MAIL_SERVERS_LIMIT_INDEX])

        return server

    @staticmethod
    def __create_sender(line: str):
        # Split and validate
        parts = line.split(":")
        parts_len = len(parts)
        assert parts_len == 2, "%s: %s" % (INVALID_LINE, line)

        # Create new sender object
        sender = Sender()
        sender.email = parts[SENDER_MAIL_INDEX]
        sender.name = parts[SENDER_NAME_INDEX]

        return sender

    @staticmethod
    def __create_proxy(line: str):
        # Split and validate
        parts = line.split(":")
        parts_len = len(parts)
        assert parts_len == 2, "%s: %s" % (INVALID_LINE, line)

        # Create new sender object
        proxy = Proxy()
        proxy.gateway = parts[PROXIES_GATEWAY_INDEX]
        proxy.port = int(parts[PROXIES_PORT_INDEX])

        return proxy

    @staticmethod
    def __create_recipient(line: str):
        # Split and validate
        parts = line.split(":")
        parts_len = len(parts)
        assert parts_len == 1, "%s: %s" % (INVALID_LINE, line)

        # Create new sender object
        recipient = Recipient()
        recipient.email = parts[RECIPIENT_EMAIL_INDEX]

        return recipient

    @staticmethod
    def __should_ignore(line: str):
        # Ignore comments and empty lines
        return False if line is "\n" or line.startswith("#") else True

    def read(self):

        print("[ ] Loading files")

        try:
            print("\t- %s" % MAIL_SERVERS_FILE_NAME)
            for line in open("%s/%s" % (BUCKET_FOLDER, MAIL_SERVERS_FILE_NAME),
                             'r'):
                if not self.__should_ignore(line):
                    continue
                else:
                    server = self.__create_mail_server(line.rstrip("\n"))
                    self.mail_servers.append(server)

            print("\t- %s" % SENDERS_FILE_NAME)
            for line in open("%s/%s" % (BUCKET_FOLDER, SENDERS_FILE_NAME),
                             'r'):
                if not self.__should_ignore(line):
                    continue
                else:
                    sender = self.__create_sender(line.rstrip("\n"))
                    self.senders.append(sender)

            print("\t- %s" % PROXIES_FILE_NAME)
            for line in open("%s/%s" % (BUCKET_FOLDER, PROXIES_FILE_NAME),
                             'r'):
                if not self.__should_ignore(line):
                    continue
                else:
                    proxy = self.__create_proxy(line.rstrip("\n"))
                    self.proxies.append(proxy)

            print("\t- %s" % RECIPIENT_FILE_NAME)
            for line in open("%s/%s" % (BUCKET_FOLDER, RECIPIENT_FILE_NAME),
                             'r'):
                if not self.__should_ignore(line):
                    continue
                else:
                    recipient = self.__create_recipient(line.rstrip("\n"))
                    self.recipients.append(recipient)

            print("\t- %s" % CONFIGS_FILE_NAME)
            with open("%s/%s" % (BUCKET_FOLDER, CONFIGS_FILE_NAME),
                      'r') as stream:
                settings = yaml.safe_load(stream)
                self.config.treads = int(
                    settings.get("treads", self.config.treads))
                self.config.mails_per_second = int(
                    settings.get("mails_per_second",
                                 self.config.mails_per_second))
                self.message.subject = settings.get("subject",
                                                    self.message.subject)

            print("\t- %s" % HTML_FILE_NAME)
            with open("%s/%s" % (BUCKET_FOLDER, HTML_FILE_NAME),
                      'r') as stream:
                self.message.html = stream.read()

        except AssertionError as e:
            print('\n[!] %s ' % e)
            exit(1)

        except Exception as e:
            print("\n[!] %s => %s" % e)
            exit(1)

        print("\n[✓] loaded all files")
Esempio n. 19
0
def test_from_s3_missing_config(s3_missing_config):
    config = Config()

    with pytest.raises(ConfigNotFound) as error:
        config.from_s3(bucket="testbucket")
Esempio n. 20
0
def test_from_s3(s3_valid_config, configuration_data):
    config = Config.from_s3(bucket="testbucket")

    assert config.config == configuration_data
Esempio n. 21
0
def test_from_sfn(configuration_data):
    config = Config.from_sfn(event={"config": configuration_data})

    assert config.config == configuration_data