def test_status_not_yet_created( forecast_stub, configuration_data, expected_dataset_arns ): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket") predictor = config.predictor(dataset_file) predictor.cli = forecast_stub.client forecast_stub.add_response("list_predictors", {"Predictors": []}) forecast_stub.add_response( "describe_dataset_group", {"DatasetArns": expected_dataset_arns} ) for arn in expected_dataset_arns: forecast_stub.add_response( "describe_dataset", {"Status": "ACTIVE", "DatasetArn": arn} ) assert predictor.status == Status.DOES_NOT_EXIST forecast_stub.assert_no_pending_responses()
def notification(event: dict, context): """Handles an S3 Event Notification (for any .csv file written to any key under train/*) :param dict event: AWS Lambda Event (in this case, an S3 Event message) :param context: The AWS Lambda Context object :return: None """ # Get the event data, then read the default config file evt = Event(event) s3_config = None # Build the input to the state machine state_input = {"bucket": evt.bucket, "dataset_file": evt.key} logger.info("Triggered by s3 notification on bucket %s, key %s" % (evt.bucket, evt.key)) try: s3_config = Config.from_s3(evt.bucket) state_input["config"] = s3_config.config except ConfigNotFound as excinfo: logger.warning("The configuration file was not found") state_input["serviceError"] = { "Error": "ConfigNotFound", "Cause": json.dumps({"errorMessage": str(excinfo)}), } except ValueError as excinfo: logger.warning("There was a problem with the config file: %s" % str(excinfo)) state_input["serviceError"] = { "Error": "ValueError", "Cause": json.dumps({"errorMessage": str(excinfo)}), } # validate the config file if it loaded properly if s3_config: errors = s3_config.validate() if errors: for error in errors: logger.warning("config problem: %s" % error) state_input["serviceError"] = { "Error": "ConfigError", "Cause": json.dumps({"errorMessage": "\n".join(errors)}), } # Start the AWS Step Function automation of Amazon Forecast sfn = get_sfn_client() sfn.start_execution( stateMachineArn=environ.get("STEP_FUNCTIONS_ARN"), name=evt.event_id, input=json.dumps(state_input), )
def test_notification(mock_config, s3_event, mocker): # an invalid configuration - default without any of the required keys config = Config() config.config = {"my": {"name": "bob"}} # simulate invalid config config_mock = mocker.MagicMock() config_mock.from_s3.return_value = config mocker.patch("lambdas.notification.handler.Config", config_mock) mock_config = mock_config.return_value mock_config.from_s3.return_value = Config() client_mock = mocker.MagicMock() mocker.patch("lambdas.notification.handler.get_sfn_client", client_mock) handler.notification(s3_event, None) args, kwargs = client_mock().start_execution.call_args assert kwargs.get("stateMachineArn") == os.getenv("STATE_MACHINE_ARN") assert "test-key_target_time_series_" in kwargs.get("name") assert "input" in kwargs.keys()
def test_dataset_import_timestamp_format(configuration_data, forecast_stub, format): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) forecast_stub.add_response( "list_dataset_import_jobs", { "DatasetImportJobs": [{ "DatasetImportJobArn": "arn:something", "LastModificationTime": datetime(2015, 1, 1), }] }, ) forecast_stub.add_response("describe_dataset_import_job", {"TimestampFormat": format}) dataset.cli = forecast_stub.client assert dataset.timestamp_format == format
def etl_forecast_trm(sfn_configuration_data, s3_valid_files): """This represents a single file upload""" config = Config.from_sfn(sfn_configuration_data) with mock_sts(): dataset_file = DatasetFile(key="train/RetailDemandTRM.csv", bucket="testbucket") forecast = config.forecast(dataset_file, "RetailDemandTRM") yield ForecastETL( workgroup="primary", schema="default", config=config, dataset_file=dataset_file, forecast=forecast, )
def test_dataset_create_noop_errors(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) configured_dataset = configuration_data.get("RetailDemandTRM").get("Datasets")[2] for i in range(0, 2): forecast_stub.add_response( "describe_dataset", { "DatasetType": configured_dataset.get("DatasetType"), "DatasetName": "RetailDemandTRM", "Domain": configured_dataset.get("Domain"), "Schema": configured_dataset.get("Schema"), "DataFrequency": configured_dataset.get("DataFrequency"), }, ) # should not call anything dataset.cli = forecast_stub.client dataset.create() # clobber the values to trigger some exceptions # this is likey caused by a user changing configuration unexpectedly dataset._dataset_type = DatasetType.RELATED_TIME_SERIES dataset._dataset_domain = DatasetDomain.WORK_FORCE dataset._data_frequency = DataFrequency("1min") dataset._dataset_schema = {} with pytest.raises(ValueError) as excinfo: dataset.create() assert "dataset type" in str(excinfo.value) assert "dataset domain" in str(excinfo.value) assert "data frequency" in str(excinfo.value) assert "dataset schema" in str(excinfo.value)
def createpredictor(event, context) -> (Status, str): """ Create/ monitor Amazon Forecast predictor creation :param event: lambda event :param context: lambda context :return: predictor status and dataset ARN """ config = Config.from_sfn(event) dataset_file = DatasetFile(event.get("dataset_file"), event.get("bucket")) predictor = config.predictor(dataset_file) if predictor.status == Status.DOES_NOT_EXIST: predictor.create() return predictor.status, predictor.arn
def createdatasetimportjob(event, context) -> (Status, str): """ Create/ monitor Amazon Forecast dataset import job creation :param event: lambda event :param context: lambda context :return: dataset import job status and dataset ARN """ config = Config.from_sfn(event) dataset_file = DatasetFile(event.get("dataset_file"), event.get("bucket")) dataset_import = config.dataset_import_job(dataset_file) if dataset_import.status == Status.DOES_NOT_EXIST: dataset_import.create() return dataset_import.status, dataset_import.arn
def test_dataset_imports(configuration_data, forecast_stub): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset = config.dataset(dataset_file) forecast_stub.add_response( "list_dataset_import_jobs", { "DatasetImportJobs": [ { "DatasetImportJobArn": "arn::", "DatasetImportJobName": "middle_job", "LastModificationTime": datetime(2018, 1, 1), }, { "DatasetImportJobArn": "arn::", "DatasetImportJobName": "end_job", "LastModificationTime": datetime(2019, 1, 1), }, { "DatasetImportJobArn": "arn::", "DatasetImportJobName": "early_job", "LastModificationTime": datetime(2017, 1, 1), }, ] }, ) dataset.cli = forecast_stub.client ds_imports = dataset.imports assert ds_imports[0].get("DatasetImportJobName") == "end_job" assert ds_imports[1].get("DatasetImportJobName") == "middle_job" assert ds_imports[2].get("DatasetImportJobName") == "early_job"
def etl_forecast(request, sfn_configuration_data, s3_valid_files): """This represents all of the file uploads that might trigger a forecast""" config = Config.from_sfn(sfn_configuration_data) with mock_sts(): dataset_file = DatasetFile(key=request.param.key, bucket="testbucket") forecast = config.forecast(dataset_file, request.param.key) yield ( ForecastETL( workgroup="primary", schema="default", config=config, dataset_file=dataset_file, forecast=forecast, ), request.param, )
def prepareexport(event, context): """ Create consolidated export tables for forecast visualization :param event: lambda event :param context: lambda context :return: glue table name """ config = Config.from_sfn(event) dataset_file = DatasetFile(event.get("dataset_file"), event.get("bucket")) dataset_group_name = event.get("dataset_group_name") forecast = config.forecast(dataset_file, dataset_group_name) workgroup = environ.get("WORKGROUP_NAME") schema = environ.get("SCHEMA_NAME") principal = environ.get("QUICKSIGHT_PRINCIPAL") source_template = environ.get("QUICKSIGHT_SOURCE") etl = ForecastETL( workgroup=workgroup, schema=schema, config=config, dataset_file=dataset_file, forecast=forecast, ) try: etl.create_input_tables() etl.consolidate_data() except ValueError as e: if "already exists" not in str(e): raise e finally: etl.cleanup_temp_tables() # attempt to create QuickSight analysis qs = QuickSight( workgroup=workgroup, table_name=etl.output_table_name, schema=schema, principal=principal, source_template=source_template, ) qs.create_data_source() qs.create_data_set() qs.create_analysis()
def test_init_forecast(forecast_stub, configuration_data): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTNPTS.csv", "some_bucket") forecast = config.forecast(dataset_file, "RetailDemandTNPTS") dataset_group = config.dataset_group(dataset_file) assert ( forecast._dataset_group.dataset_group_name == dataset_group.dataset_group_name ) assert forecast._forecast_config == config.config_item(dataset_file, "Forecast")
def createdatasetgroup(event, context) -> (Status, str): """ Create/ monitor Amazon Forecast dataset group creation :param event: lambda event :param context: lambda context :return: dataset group status and dataset group ARN """ config = Config.from_sfn(event) dataset_file = DatasetFile(event.get("dataset_file"), event.get("bucket")) dataset_group = config.dataset_group(dataset_file) if dataset_group.status == Status.DOES_NOT_EXIST: dataset_group.create() if dataset_group.status == Status.ACTIVE: datasets = config.datasets(dataset_file) dataset_group.update(datasets) return dataset_group.status, dataset_group.arn
def createforecast(event, context): """ Create/ monitor Amazon Forecast forecast creation :param event: lambda event :param context: lambda context :return: forecast / forecast export status and forecast ARN """ config = Config.from_sfn(event) dataset_file = DatasetFile(event.get("dataset_file"), event.get("bucket")) forecast = config.forecast(dataset_file) tracked = forecast if forecast.status == Status.DOES_NOT_EXIST: # TODO: publish predictor stats to CloudWatch prior to create logger.info("Creating forecast for %s" % dataset_file.prefix) forecast.create() if forecast.status == Status.ACTIVE: logger.info("Creating forecast export for %s" % dataset_file.prefix) tracked = forecast.export(dataset_file) return tracked.status, forecast.arn
import sys sys.path.append('../ReinforcementLearning') import unittest test = unittest.TestCase() from shared.grid import Grid from shared.policy import Policy from shared.rewards import Rewards from shared.actions import Actions from shared.config import Config from shared.statesandblockers import StatesAndBlocks from policyevaluation import PolicyEvaluation config = Config() policy = Policy() states_and_blocks = StatesAndBlocks(config.positive_terminal_states,config.negative_terminal_states,config.starting_states,config.blocking_states) grid = Grid(config.numer_of_rows,config.number_of_columns , states_and_blocks) rewards = Rewards(config.positive_reward, config.negative_reward, config.step_reward, states_and_blocks) grid.show_grid_positions() # testing if we get the correct state number based on position test.assertEqual(grid.get_state_number(0,0), 0) test.assertEqual(grid.get_state_number(0,1), 1) test.assertEqual(grid.get_state_number(0,2), 2) test.assertEqual(grid.get_state_number(0,3), 3) test.assertEqual(grid.get_state_number(1,1), 5) test.assertEqual(grid.get_state_number(1,2), 6) test.assertEqual(grid.get_state_number(2,3), 11)
def test_dataset_import_job_status_lifecycle(configuration_data, forecast_stub, mocker): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset_import_job = config.dataset_import_job(dataset_file) size = 40 etag = "9d2990c88a30dac1785a09fbb46f3e10" # first call - doesn't exist forecast_stub.add_response("list_dataset_import_jobs", {"DatasetImportJobs": []}) forecast_stub.add_response( "list_dataset_import_jobs", { "DatasetImportJobs": [ { "LastModificationTime": datetime(2015, 1, 1), "DatasetImportJobArn": "arn:2015-1-1", }, { "LastModificationTime": datetime(2017, 1, 1), "DatasetImportJobArn": "arn:2017-1-1", }, { "LastModificationTime": datetime(2016, 1, 1), "DatasetImportJobArn": "arn:2016-1-1", }, ] }, ) forecast_stub.add_response( "describe_dataset_import_job", {"Status": "ACTIVE"}, ) forecast_stub.add_response( "list_tags_for_resource", {"Tags": [{ "Key": "SolutionETag", "Value": etag }]}) forecast_stub.add_response( "list_dataset_import_jobs", { "DatasetImportJobs": [ { "LastModificationTime": datetime(2015, 1, 1), "DatasetImportJobArn": "arn:2015-1-1", }, { "LastModificationTime": datetime(2017, 1, 1), "DatasetImportJobArn": "arn:2017-1-1", }, { "LastModificationTime": datetime(2016, 1, 1), "DatasetImportJobArn": "arn:2016-1-1", }, ] }, ) forecast_stub.add_response( "describe_dataset_import_job", {"Status": "ACTIVE"}, ) forecast_stub.add_response( "list_tags_for_resource", { "Tags": [{ "Key": "SolutionETag", "Value": "9d2990c88a30dac1785a09fbb46f3e11" }] }, ) dataset_import_job.cli = forecast_stub.client mocker.patch( "shared.Dataset.dataset_file.DatasetFile.etag", new_callable=mocker.PropertyMock, return_value=etag, ) assert dataset_import_job.status == Status.DOES_NOT_EXIST # simulate finding an active dataset assert dataset_import_job.status == Status.ACTIVE # simulate a new dataset (with more lines) uploaded assert dataset_import_job.status == Status.DOES_NOT_EXIST
def test_dataset_import_job_status_lifecycle(configuration_data, forecast_stub, mocker): config = Config() config.config = configuration_data dataset_file = DatasetFile("RetailDemandTRM.csv", "some_bucket") dataset_import_job = config.dataset_import_job(dataset_file) size = 40 # first call - doesn't exist forecast_stub.add_response("list_dataset_import_jobs", {"DatasetImportJobs": []}) forecast_stub.add_response( "list_dataset_import_jobs", { "DatasetImportJobs": [ { "LastModificationTime": datetime(2015, 1, 1), "DatasetImportJobArn": "arn:2015-1-1", }, { "LastModificationTime": datetime(2017, 1, 1), "DatasetImportJobArn": "arn:2017-1-1", }, { "LastModificationTime": datetime(2016, 1, 1), "DatasetImportJobArn": "arn:2016-1-1", }, ] }, ) forecast_stub.add_response( "describe_dataset_import_job", {"Status": "ACTIVE", "FieldStatistics": {"item_id": {"Count": size}}}, ) forecast_stub.add_response("list_tags_for_resource", {"Tags": []}) forecast_stub.add_response( "list_dataset_import_jobs", { "DatasetImportJobs": [ { "LastModificationTime": datetime(2015, 1, 1), "DatasetImportJobArn": "arn:2015-1-1", }, { "LastModificationTime": datetime(2017, 1, 1), "DatasetImportJobArn": "arn:2017-1-1", }, { "LastModificationTime": datetime(2016, 1, 1), "DatasetImportJobArn": "arn:2016-1-1", }, ] }, ) forecast_stub.add_response( "describe_dataset_import_job", {"Status": "ACTIVE", "FieldStatistics": {"item_id": {"Count": size + 1}}}, ) forecast_stub.add_response("list_tags_for_resource", {"Tags": []}) dataset_import_job.cli = forecast_stub.client mocker.patch( "shared.Dataset.dataset_file.DatasetFile.size", new_callable=mocker.PropertyMock, return_value=size, ) assert dataset_import_job.status == Status.DOES_NOT_EXIST # simulate finding an active dataset assert dataset_import_job.status == Status.ACTIVE # simulate a new dataset (with more lines) uploaded assert dataset_import_job.status == Status.DOES_NOT_EXIST
class Reader: mail_servers: List[MailServer] = [] senders: List[Sender] = [] proxies: List[Proxy] = [] recipients: List[Recipient] = [] config = Config() message = Message(ATTACHMENTS_FOLDER) @staticmethod def __create_mail_server(line: str): # Split and validate parts = line.split(":") parts_len = len(parts) assert parts_len == 4 or parts_len == 5, "%s: %s" % (INVALID_LINE, line) # Create new server object server = MailServer() server.host = parts[MAIL_SERVERS_HOST_INDEX] server.username = parts[MAIL_SERVERS_USERNAME_INDEX] server.password = parts[MAIL_SERVERS_PASSWORD_INDEX] server.port = int(parts[MAIL_SERVERS_PORT_INDEX]) if parts_len == 5: server.limit = int(parts[MAIL_SERVERS_LIMIT_INDEX]) return server @staticmethod def __create_sender(line: str): # Split and validate parts = line.split(":") parts_len = len(parts) assert parts_len == 2, "%s: %s" % (INVALID_LINE, line) # Create new sender object sender = Sender() sender.email = parts[SENDER_MAIL_INDEX] sender.name = parts[SENDER_NAME_INDEX] return sender @staticmethod def __create_proxy(line: str): # Split and validate parts = line.split(":") parts_len = len(parts) assert parts_len == 2, "%s: %s" % (INVALID_LINE, line) # Create new sender object proxy = Proxy() proxy.gateway = parts[PROXIES_GATEWAY_INDEX] proxy.port = int(parts[PROXIES_PORT_INDEX]) return proxy @staticmethod def __create_recipient(line: str): # Split and validate parts = line.split(":") parts_len = len(parts) assert parts_len == 1, "%s: %s" % (INVALID_LINE, line) # Create new sender object recipient = Recipient() recipient.email = parts[RECIPIENT_EMAIL_INDEX] return recipient @staticmethod def __should_ignore(line: str): # Ignore comments and empty lines return False if line is "\n" or line.startswith("#") else True def read(self): print("[ ] Loading files") try: print("\t- %s" % MAIL_SERVERS_FILE_NAME) for line in open("%s/%s" % (BUCKET_FOLDER, MAIL_SERVERS_FILE_NAME), 'r'): if not self.__should_ignore(line): continue else: server = self.__create_mail_server(line.rstrip("\n")) self.mail_servers.append(server) print("\t- %s" % SENDERS_FILE_NAME) for line in open("%s/%s" % (BUCKET_FOLDER, SENDERS_FILE_NAME), 'r'): if not self.__should_ignore(line): continue else: sender = self.__create_sender(line.rstrip("\n")) self.senders.append(sender) print("\t- %s" % PROXIES_FILE_NAME) for line in open("%s/%s" % (BUCKET_FOLDER, PROXIES_FILE_NAME), 'r'): if not self.__should_ignore(line): continue else: proxy = self.__create_proxy(line.rstrip("\n")) self.proxies.append(proxy) print("\t- %s" % RECIPIENT_FILE_NAME) for line in open("%s/%s" % (BUCKET_FOLDER, RECIPIENT_FILE_NAME), 'r'): if not self.__should_ignore(line): continue else: recipient = self.__create_recipient(line.rstrip("\n")) self.recipients.append(recipient) print("\t- %s" % CONFIGS_FILE_NAME) with open("%s/%s" % (BUCKET_FOLDER, CONFIGS_FILE_NAME), 'r') as stream: settings = yaml.safe_load(stream) self.config.treads = int( settings.get("treads", self.config.treads)) self.config.mails_per_second = int( settings.get("mails_per_second", self.config.mails_per_second)) self.message.subject = settings.get("subject", self.message.subject) print("\t- %s" % HTML_FILE_NAME) with open("%s/%s" % (BUCKET_FOLDER, HTML_FILE_NAME), 'r') as stream: self.message.html = stream.read() except AssertionError as e: print('\n[!] %s ' % e) exit(1) except Exception as e: print("\n[!] %s => %s" % e) exit(1) print("\n[✓] loaded all files")
def test_from_s3_missing_config(s3_missing_config): config = Config() with pytest.raises(ConfigNotFound) as error: config.from_s3(bucket="testbucket")
def test_from_s3(s3_valid_config, configuration_data): config = Config.from_s3(bucket="testbucket") assert config.config == configuration_data
def test_from_sfn(configuration_data): config = Config.from_sfn(event={"config": configuration_data}) assert config.config == configuration_data