def __init__(self, app: core.App, cfn_name: str, stack_env): super().__init__(scope=app, id=f"{cfn_name}-{stack_env}") # lambda lambda_task = lambda_.Function( scope=self, id=f"{cfn_name}-lambda-task", code=lambda_.AssetCode.from_asset("lambda_script"), handler="lambda_handler.lambda_task", timeout=core.Duration.seconds(10), runtime=self.LAMBDA_PYTHON_RUNTIME, memory_size=128 ) # StepFunction Tasks sns_source = sfn.Pass( scope=self, id=f"{cfn_name}-sfn-pass", comment="pass example", input_path="$", result_path="$.source", result=sfn.Result.from_string("example"), output_path="$" ) arguments_generation = sfn.Task( scope=self, id=f"{cfn_name}-sfn-lambda-task", task=sfn_tasks.RunLambdaTask( lambda_function=lambda_task, payload=sfn.TaskInput.from_object({ "time.$": "$.time", "source.$": "$.source" })), input_path="$", result_path="$.arguments", output_path="$.arguments.Payload" ) # stepfunctions definition = sns_source.next(arguments_generation) _ = sfn.StateMachine( scope=self, id=f"{cfn_name}-SFn-{stack_env}", definition=definition )
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) repo = codecommit.Repository( self, "repo", repository_name="demorepo", description="Repo to test PR with stepfunctions") proj1 = self.new_build_project(repo, "pr_specs/buildspec.yaml", "proj1") proj2 = _codebuild.Project( self, "proj_name", badge=True, description="Build project for ", environment=_codebuild.BuildEnvironment( build_image=_codebuild.LinuxBuildImage.STANDARD_5_0, compute_type=_codebuild.ComputeType.LARGE, privileged=True), project_name="proj_name", build_spec=_codebuild.BuildSpec.from_source_filename( filename="pr_specs/buildspec2.yaml"), timeout=Duration.minutes(10), ) input_task = _step_fn.Pass(self, "passstate") proj1_tasks = self.new_codebuild_task(proj1) proj2_tasks = self.new_codebuild_task(proj2) definition = input_task.next(proj1_tasks).next(proj2_tasks) _fn = _step_fn.StateMachine( self, "statemachine", definition=definition, state_machine_name="statemachine", )
def __init__(self, scope: core.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) test_queue = sqs.Queue(self, 'test-queue', queue_name='test1') test_topic = sns.Topic(self, 'test-topic') sns.Subscription(self, 'test-subscription', topic=test_topic, endpoint=test_queue.queue_arn, protocol=sns.SubscriptionProtocol.SQS) kinesis.Stream(self, 'test-stream', stream_name='donut-sales', shard_count=2) create_order = step.Pass(self, 'create-order', result=step.Result.from_object({ "Order": { "Customer": "Alice", "Product": "Coffee", "Billing": { "Price": 10.0, "Quantity": 4.0 } } })) calculate_amount = step.Pass(self, 'calculate-amount', result=step.Result.from_number(40.0), result_path='$.Order.Billing.Amount', output_path='$.Order.Billing') order_definition = create_order.next(calculate_amount) step.StateMachine(self, 'test-state-machine', state_machine_name='order-machine', definition=order_definition) make_tea = step.Choice( self, 'make-tea', comment='Input should look like {"tea":"green"}') green = step.Pass(self, 'green', result=step.Result.from_string('Green tea')) make_tea.when(step.Condition.string_equals('$.tea', 'green'), green) black = step.Pass(self, 'black', result=step.Result.from_string('Black tea')) make_tea.when(step.Condition.string_equals('$.tea', 'black'), black) orange = step.Pass(self, 'orange', result=step.Result.from_string('Black tea')) make_tea.when(step.Condition.string_equals('$.tea', 'orange'), orange) error = step.Pass(self, 'error', result=step.Result.from_string('Bad input')) make_tea.otherwise(error) step.StateMachine(self, 'test-state-machine-2', state_machine_name='tea-machine', definition=make_tea)
def __init__(self, scope: Construct, construct_id: str, env, **kwargs) -> None: super().__init__(scope, construct_id, env=env, **kwargs) rg_property = network_fw.CfnRuleGroup.RuleGroupProperty( rule_variables=None, rules_source=network_fw.CfnRuleGroup.RulesSourceProperty( stateless_rules_and_custom_actions=network_fw.CfnRuleGroup. StatelessRulesAndCustomActionsProperty(stateless_rules=[ network_fw.CfnRuleGroup.StatelessRuleProperty( priority=10, rule_definition=network_fw.CfnRuleGroup. RuleDefinitionProperty( actions=["aws:drop"], match_attributes=network_fw.CfnRuleGroup. MatchAttributesProperty(destinations=[ network_fw.CfnRuleGroup.AddressProperty( address_definition="127.0.0.1/32") ]))) ]))) nf_rule_group = network_fw.CfnRuleGroup( scope=self, id='GuardDutyNetworkFireWallRuleGroup', capacity=100, rule_group_name='guardduty-network-firewall', type='STATELESS', description='Guard Duty network firewall rule group', tags=[CfnTag(key='Name', value='cfn.rule-group.stack')], rule_group=rg_property) """ https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-rule-dlq.html#dlq-considerations """ dlq_statemachine = sqs.Queue(self, 'DLQStateMachine', queue_name='dlq_state_machine') guardduty_firewall_ddb = ddb.Table( scope=self, id=f'GuarddutyFirewallDDB', table_name='GuardDutyFirewallDDBTable', removal_policy=RemovalPolicy.DESTROY, partition_key=ddb.Attribute(name='HostIp', type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) """ IAM role for ddb permission """ nf_iam_role = iam.Role( self, 'DDBRole', role_name=f'ddb-nf-role-{env.region}', assumed_by=iam.ServicePrincipal(service='lambda.amazonaws.com')) nf_iam_role.add_to_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=["arn:aws:logs:*:*:*"], actions=[ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents" ])) nf_iam_role.add_to_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=[ guardduty_firewall_ddb.table_arn, f"{guardduty_firewall_ddb.table_arn}/*" ], actions=[ "dynamodb:PutItem", "dynamodb:GetItem", "dynamodb:Scan" ])) nf_iam_role.add_to_policy( iam.PolicyStatement( effect=iam.Effect.ALLOW, resources=[nf_rule_group.ref, f"{nf_rule_group.ref}/*"], actions=[ "network-firewall:DescribeRuleGroup", "network-firewall:UpdateRuleGroup" ])) record_ip_in_db = _lambda.Function( self, 'RecordIpInDB', function_name='record-ip-in-ddb', runtime=_lambda.Runtime.PYTHON_3_8, code=_lambda.Code.from_asset('lambda_fns'), handler='addIPToDDB.handler', environment=dict(ACLMETATABLE=guardduty_firewall_ddb.table_name), role=nf_iam_role) """ https://docs.amazonaws.cn/en_us/eventbridge/latest/userguide/eb-event-patterns-content-based-filtering.html """ record_ip_task = step_fn_task.LambdaInvoke( self, 'RecordIpDDBTask', lambda_function=record_ip_in_db, payload=step_fn.TaskInput.from_object({ "comment": "Relevant fields from the GuardDuty / Security Hub finding", "HostIp.$": "$.detail.findings[0].ProductFields.aws/guardduty/service/action/networkConnectionAction/remoteIpDetails/ipAddressV4", "Timestamp.$": "$.detail.findings[0].ProductFields.aws/guardduty/service/eventLastSeen", "FindingId.$": "$.id", "AccountId.$": "$.account", "Region.$": "$.region" }), result_path='$', payload_response_only=True) firewall_update_rule = _lambda.Function( scope=self, id='GuardDutyUpdateNetworkFirewallRule', function_name='gurdduty-update-networkfirewal-rule-group', runtime=_lambda.Runtime.PYTHON_3_8, code=_lambda.Code.from_asset('lambda_fns'), handler='updateNetworkFireWall.handler', environment=dict( FIREWALLRULEGROUP=nf_rule_group.ref, RULEGROUPPRI='30000', CUSTOMACTIONNAME='GuardDutytoFirewall', CUSTOMACTIONVALUE='gurdduty-update-networkfirewal-rule-group'), role=nf_iam_role) firewall_update_rule_task = step_fn_task.LambdaInvoke( self, 'FirewallUpdateRuleTask', lambda_function=firewall_update_rule, input_path='$', result_path='$', payload_response_only=True) firewall_no_update_job = step_fn.Pass(self, 'No Firewall change') notify_failure_job = step_fn.Fail(self, 'NotifyFailureJob', cause='Any Failure', error='Unknown') send_to_slack = _lambda.Function( scope=self, id='SendAlertToSlack', function_name='gurdduty-networkfirewal-to-slack', runtime=_lambda.Runtime.PYTHON_3_8, handler="sendSMSToSlack.handler", code=_lambda.Code.from_asset('lambda_fns')) send_slack_task = step_fn_task.LambdaInvoke( scope=self, id='LambdaToSlackDemo', lambda_function=send_to_slack, input_path='$', result_path='$') is_new_ip = step_fn.Choice(self, "New IP?") is_block_succeed = step_fn.Choice(self, "Block sucessfully?") definition = step_fn.Chain \ .start(record_ip_task .add_retry(errors=["States.TaskFailed"], interval=Duration.seconds(2), max_attempts=2) .add_catch(errors=["States.ALL"], handler=notify_failure_job)) \ .next(is_new_ip .when(step_fn.Condition.boolean_equals('$.NewIP', True), firewall_update_rule_task .add_retry(errors=["States.TaskFailed"], interval=Duration.seconds(2), max_attempts=2 ) .add_catch(errors=["States.ALL"], handler=notify_failure_job) .next( is_block_succeed .when(step_fn.Condition.boolean_equals('$.Result', False), notify_failure_job) .otherwise(send_slack_task) ) ) .otherwise(firewall_no_update_job) ) guardduty_state_machine = step_fn.StateMachine( self, 'GuarddutyStateMachine', definition=definition, timeout=Duration.minutes(5), state_machine_name='guardduty-state-machine') event.Rule( scope=self, id='EventBridgeCatchIPv4', description="Security Hub - GuardDuty findings with remote IP", rule_name='guardduty-catch-ipv4', event_pattern=event.EventPattern( account=['123456789012'], detail_type=["GuardDuty Finding"], source=['aws.securityhub'], detail={ "findings": { "ProductFields": { "aws/guardduty/service/action/networkConnectionAction/remoteIpDetails/ipAddressV4": [{ "exists": True }] } } }), targets=[ event_target.SfnStateMachine( machine=guardduty_state_machine, dead_letter_queue=dlq_statemachine) ]) """ Send other findings to slack """ send_finding_to_slack = _lambda.Function( self, 'SendFindingToSlack', function_name='send-finding-to-slack', runtime=_lambda.Runtime.PYTHON_3_8, handler="sendFindingToSlack.handler", code=_lambda.Code.from_asset('lambda_fns')) send_findings_task = step_fn_task.LambdaInvoke( self, 'SendFindingToSlackTask', lambda_function=send_finding_to_slack, payload=step_fn.TaskInput.from_object({ "comment": "Others fields from the GuardDuty / Security Hub finding", "severity.$": "$.detail.findings[0].Severity.Label", "Account_ID.$": "$.account", "Finding_ID.$": "$.id", "Finding_Type.$": "$.detail.findings[0].Types", "Region.$": "$.region", "Finding_description.$": "$.detail.findings[0].Description" }), result_path='$') slack_failure_job = step_fn.Fail(self, 'SlackNotifyFailureJob', cause='Any Failure', error='Unknown') finding_definition = step_fn.Chain \ .start(send_findings_task .add_retry(errors=["States.TaskFailed"], interval=Duration.seconds(2), max_attempts=2) .add_catch(errors=["States.ALL"], handler=slack_failure_job)) sechub_findings_state_machine = step_fn.StateMachine( self, 'SecHubFindingsStateMachine', definition=finding_definition, timeout=Duration.minutes(5), state_machine_name='sechub-finding-state-machine') event.Rule(scope=self, id='EventBridgeFindings', description="Security Hub - GuardDuty findings others", rule_name='others-findings', event_pattern=event.EventPattern( account=['123456789012'], source=['aws.securityhub'], detail_type=['Security Hub Findings - Imported'], detail={"severity": [5, 8]}), targets=[ event_target.SfnStateMachine( machine=sechub_findings_state_machine, dead_letter_queue=dlq_statemachine) ])
def __init__(self, scope: core.Construct, id: str, *, start: sfn.IChainable=None) -> None: super().__init__(scope, id) if not start: start = sfn.Pass(self, 'Pass') self.start = start
def smprops(self) -> dict: if not self._smprops: self._smprops = {'definition': aws_stepfunctions.Pass(self, 'pass')} return self._smprops
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Step Function Starts Here # The first thing we need to do is see if they are asking for pineapple on a pizza pineapple_check_lambda = _lambda.Function( self, "pineappleCheckLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="orderPizza.handler", code=_lambda.Code.from_asset("lambda_fns"), ) # Step functions are built up of steps, we need to define our first step order_pizza = step_fn.Task( self, 'Order Pizza Job', task=step_fn_tasks.InvokeFunction(pineapple_check_lambda), input_path='$.flavour', result_path='$.pineappleAnalysis') # Pizza Order failure step defined job_failed = step_fn.Fail(self, 'Sorry, We Dont add Pineapple', cause='Failed To Make Pizza', error='They asked for Pineapple') # If they didnt ask for pineapple let's cook the pizza cook_pizza = step_fn.Pass(self, 'Lets make your pizza') # If they ask for a pizza with pineapple, fail. Otherwise cook the pizza definition = step_fn.Chain \ .start(order_pizza) \ .next(step_fn.Choice(self, 'With Pineapple?') \ .when(step_fn.Condition.boolean_equals('$.pineappleAnalysis.containsPineapple', True), job_failed) \ .otherwise(cook_pizza)) state_machine = step_fn.StateMachine(self, 'StateMachine', definition=definition, timeout=core.Duration.minutes(5)) # Dead Letter Queue Setup dlq = sqs.Queue(self, 'stateMachineLambdaDLQ', visibility_timeout=core.Duration.seconds(300)) # defines an AWS Lambda resource to connect to our API Gateway state_machine_lambda = _lambda.Function( self, "stateMachineLambdaHandler", runtime=_lambda.Runtime.NODEJS_12_X, handler="stateMachineLambda.handler", code=_lambda.Code.from_asset("lambda_fns"), environment={'statemachine_arn': state_machine.state_machine_arn}) state_machine.grant_start_execution(state_machine_lambda) # defines an API Gateway REST API resource backed by our "sqs_publish_lambda" function. api_gw.LambdaRestApi(self, 'Endpoint', handler=state_machine_lambda)
def __init__(self, app: core.App, cfn_name: str, stack_env): super().__init__(scope=app, id=f"{cfn_name}-{stack_env}") glue_code = s3_assets.Asset( scope=self, id=f"{cfn_name}-glue-script", path="./glue_script/glue_job_script.py", ) glue_s3_access_role = iam.Role( scope=self, id=f"glue_s3_access_role_{stack_env}", role_name=f"glue_s3_access_role_{stack_env}", assumed_by=iam.ServicePrincipal("glue.amazonaws.com")) # add policy to access S3 glue_s3_access_role.add_to_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=["*"], actions=["s3:*"])) # add policy to access CloudWatch Logs glue_s3_access_role.add_to_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=["arn:aws:logs:*:*:*"], actions=[ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents", "logs:DescribeLogStreams" ])) # glue # specify the name, because `the name` deployed cannot be obtained. glue_job_name = f"{cfn_name}-glue-job" _ = glue.CfnJob( scope=self, id=glue_job_name, name=glue_job_name, command=glue.CfnJob.JobCommandProperty( # glueetl or pythonshell name=self.GLUE_JOB_COMMAND_GLUE_ETL, script_location= f"s3://{glue_code.s3_bucket_name}/{glue_code.s3_object_key}"), # set role-name! role=glue_s3_access_role.role_name, glue_version=self.GLUE_VERSION_2_0, number_of_workers=2, worker_type=self.GLUE_WORKER_TYPE_STANDARD, timeout=1800) # StepFunction Tasks sfn_task_pass = sfn.Pass(scope=self, id=f"{cfn_name}-sfn-pass", comment="pass example", input_path="$", result_path="$.source", result=sfn.Result.from_string("example"), output_path="$") # wait until the JOB completed: sfn.IntegrationPattern.RUN_JOB # process next step without waiting: sfn.IntegrationPattern.REQUEST_RESPONSE sfn_task_glue_job = sfn_tasks.GlueStartJobRun( scope=self, id=f"{cfn_name}-sfn-lambda-task", glue_job_name=glue_job_name, integration_pattern=sfn.IntegrationPattern.RUN_JOB, input_path="$", result_path="$.result", output_path="$.output") # stepfunctions definition = sfn_task_pass.next(sfn_task_glue_job) _ = sfn.StateMachine(scope=self, id=f"{cfn_name}-SFn-{stack_env}", definition=definition)
def __init__(self, scope: core.Construct, id: str, id_checker: str, event_bus: str, stage: Optional[str] = 'prod', **kwargs) -> None: super().__init__(scope, id + '-' + stage, **kwargs) app_table_name = id + '-applications-table-' + stage app_table = ddb.Table(self, id=app_table_name, table_name=app_table_name, partition_key=ddb.Attribute( name='id', type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) events_table_name = id + '-events-table-' + stage events_table = ddb.Table(self, id=events_table_name, table_name=events_table_name, partition_key=ddb.Attribute( name='id', type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST, stream=ddb.StreamViewType.NEW_IMAGE) self._table_stream_arn = events_table.table_stream_arn # create our Lambda function for the bank account service func_name = id + '-' + stage + '-' + 'account-application' lambda_assets = lambda_.Code.from_asset('account_application_service') handler = lambda_.Function(self, func_name, code=lambda_assets, runtime=lambda_.Runtime.NODEJS_10_X, handler='main.handler', environment={ 'ACCOUNTS_TABLE_NAME': app_table.table_name, 'EVENTS_TABLE_NAME': events_table.table_name, 'REGION': core.Aws.REGION }) gw.LambdaRestApi(self, id=stage + '-' + id, handler=handler) # grant main Lambda function access to DynamoDB tables app_table.grant_read_write_data(handler.role) events_table.grant_read_write_data(handler.role) p_statement = iam.PolicyStatement(actions=[ 'ssm:Describe*', 'ssm:Get*', 'ssm:List*', 'events:*', 'states:*' ], effect=iam.Effect.ALLOW, resources=['*']) handler.add_to_role_policy(statement=p_statement) # create the Lambda function for the event publisher evt_publisher = id + '-' + stage + '-' + 'event-publisher' evt_handler = lambda_.Function( self, evt_publisher, code=lambda_assets, runtime=lambda_.Runtime.NODEJS_10_X, handler='event-publisher.handler', events=[ lambda_es.DynamoEventSource( table=events_table, starting_position=lambda_.StartingPosition.LATEST) ], environment={ 'EVENT_BRIDGE_ARN': event_bus, 'REGION': core.Aws.REGION }) evt_handler.add_to_role_policy(statement=p_statement) # set up StepFunctions approve_application = sf.Task( self, 'Approve Application', task=sft.InvokeFunction(handler, payload={ 'body': { 'command': 'APPROVE_ACCOUNT_APPLICATION', 'data': { 'id.$': '$.application.id' } } }), result_path='$.approveApplication') reject_application = sf.Task(self, 'Reject Application', task=sft.InvokeFunction( handler, payload={ 'body': { 'command': 'REJECT_ACCOUNT_APPLICATION', 'data': { 'id.$': '$.application.id' } } }), result_path='$.rejectApplication') id_checker_handler = lambda_.Function.from_function_arn( self, 'IdentityChecker', function_arn=id_checker) check_identity = sf.Task(self, 'Check Identity', task=sft.InvokeFunction( id_checker_handler, payload={ 'body': { 'command': 'CHECK_IDENTITY', 'data': { 'application.$': '$.application' } } })) wait_for_human_review = sf.Task(self, 'Wait for Human Review', task=sft.RunLambdaTask(handler, integration_pattern=sf.ServiceIntegrationPattern.WAIT_FOR_TASK_TOKEN, payload={ 'body': { 'command': 'FLAG_ACCOUNT_APPLICATION_FOR_HUMAN_REVIEW', 'data': { 'id.$': '$.application.id', 'taskToken': sf.Context.task_token } } }), result_path='$.humanReview') \ .next( sf.Choice(self, 'Human Approval Choice') .when(sf.Condition.string_equals('$.humanReview.decision', 'APPROVE'), next=approve_application) .when(sf.Condition.string_equals('$.humanReview.decision', 'REJECT'), next=reject_application)) sm_definition = sf.Parallel(self, 'Perform Automated Checks', result_path='$.checks') \ .branch(check_identity) \ .branch(sf.Pass(self, 'Check Fraud Model', result=sf.Result({'flagged': False}))) \ .next( sf.Choice(self, 'Automated Checks Choice') .when(sf.Condition.boolean_equals('$.checks[0].flagged', True), next=wait_for_human_review) .when(sf.Condition.boolean_equals('$.checks[1].flagged', True), next=wait_for_human_review) .otherwise(approve_application)) state_machine = sf.StateMachine(self, 'OpenAccountStateMachine' + stage, definition=sm_definition) ssm.CfnParameter(self, id='StateMachineArnSSM', type='String', value=state_machine.state_machine_arn, name='StateMachineArnSSM')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) #the S3 bucket where CloudFront Access Logs will be stored cf_access_logs = s3.Bucket(self, "LogBucket") #S3 bucket where Athena will put the results athena_results = s3.Bucket(self, "AthenaResultsBucket") #create an Athena database glue_database_name = "serverlessland_database" myDatabase = glue.CfnDatabase( self, id=glue_database_name, catalog_id=account, database_input=glue.CfnDatabase.DatabaseInputProperty( description=f"Glue database '{glue_database_name}'", name=glue_database_name, ) ) #define a table with the structure of CloudFront Logs https://docs.aws.amazon.com/athena/latest/ug/cloudfront-logs.html athena_table = glue.CfnTable(self, id='cfaccesslogs', catalog_id=account, database_name=glue_database_name, table_input=glue.CfnTable.TableInputProperty( name='cf_access_logs', description='CloudFront access logs', table_type='EXTERNAL_TABLE', parameters = { 'skip.header.line.count': '2', }, storage_descriptor=glue.CfnTable.StorageDescriptorProperty( location="s3://"+cf_access_logs.bucket_name+"/", input_format='org.apache.hadoop.mapred.TextInputFormat', output_format='org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', compressed=False, serde_info=glue.CfnTable.SerdeInfoProperty( serialization_library='org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', parameters={ 'field.delim' : ' ' } ), columns=[ glue.CfnTable.ColumnProperty(name='date', type='date'), glue.CfnTable.ColumnProperty(name='time', type='string'), glue.CfnTable.ColumnProperty(name='location', type='string'), glue.CfnTable.ColumnProperty(name='bytes', type='bigint'), glue.CfnTable.ColumnProperty(name='request_ip', type='string'), glue.CfnTable.ColumnProperty(name='method', type='string'), glue.CfnTable.ColumnProperty(name='host', type='string'), glue.CfnTable.ColumnProperty(name='uri', type='string'), glue.CfnTable.ColumnProperty(name='status', type='string'), glue.CfnTable.ColumnProperty(name='referer', type='string'), glue.CfnTable.ColumnProperty(name='user_agent', type='string'), glue.CfnTable.ColumnProperty(name='query_string', type='string'), glue.CfnTable.ColumnProperty(name='cookie', type='string'), glue.CfnTable.ColumnProperty(name='result_type', type='string'), glue.CfnTable.ColumnProperty(name='request_id', type='string'), glue.CfnTable.ColumnProperty(name='host_header', type='string'), glue.CfnTable.ColumnProperty(name='request_protocol', type='string'), glue.CfnTable.ColumnProperty(name='request_bytes', type='bigint'), glue.CfnTable.ColumnProperty(name='time_taken', type='float'), glue.CfnTable.ColumnProperty(name='xforwarded_for', type='string'), glue.CfnTable.ColumnProperty(name='ssl_protocol', type='string'), glue.CfnTable.ColumnProperty(name='ssl_cipher', type='string'), glue.CfnTable.ColumnProperty(name='response_result_type', type='string'), glue.CfnTable.ColumnProperty(name='http_version', type='string'), glue.CfnTable.ColumnProperty(name='fle_status', type='string'), glue.CfnTable.ColumnProperty(name='fle_encrypted_fields', type='int'), glue.CfnTable.ColumnProperty(name='c_port', type='int'), glue.CfnTable.ColumnProperty(name='time_to_first_byte', type='float'), glue.CfnTable.ColumnProperty(name='x_edge_detailed_result_type', type='string'), glue.CfnTable.ColumnProperty(name='sc_content_type', type='string'), glue.CfnTable.ColumnProperty(name='sc_content_len', type='string'), glue.CfnTable.ColumnProperty(name='sc_range_start', type='bigint'), glue.CfnTable.ColumnProperty(name='sc_range_end', type='bigint') ] ), ) ) #submit the query and wait for the results start_query_execution_job = tasks.AthenaStartQueryExecution(self, "Start Athena Query", query_string="SELECT uri FROM cf_access_logs limit 10", integration_pattern=sf.IntegrationPattern.RUN_JOB, #executes the command in SYNC mode query_execution_context=tasks.QueryExecutionContext( database_name=glue_database_name ), result_configuration=tasks.ResultConfiguration( output_location=s3.Location( bucket_name=athena_results.bucket_name, object_key="results" ) ) ) #get the results get_query_results_job = tasks.AthenaGetQueryResults(self, "Get Query Results", query_execution_id=sf.JsonPath.string_at("$.QueryExecution.QueryExecutionId"), result_path=sf.JsonPath.string_at("$.GetQueryResults"), ) #prepare the query to see if more results are available (up to 1000 can be retrieved) prepare_next_params = sf.Pass(self, "Prepare Next Query Params", parameters={ "QueryExecutionId.$": "$.StartQueryParams.QueryExecutionId", "NextToken.$": "$.GetQueryResults.NextToken" }, result_path=sf.JsonPath.string_at("$.StartQueryParams") ) #check to see if more results are available has_more_results = sf.Choice(self, "Has More Results?").when( sf.Condition.is_present("$.GetQueryResults.NextToken"), prepare_next_params.next(get_query_results_job) ).otherwise(sf.Succeed(self, "Done")) #do something with each result #here add your own logic map = sf.Map(self, "Map State", max_concurrency=1, input_path=sf.JsonPath.string_at("$.GetQueryResults.ResultSet.Rows[1:]"), result_path = sf.JsonPath.DISCARD ) map.iterator(sf.Pass(self, "DoSomething")) # Step function to orchestrate Athena query and retrieving the results workflow = sf.StateMachine(self, "AthenaQuery", definition=start_query_execution_job.next(get_query_results_job).next(map).next(has_more_results), timeout=Duration.minutes(60) ) CfnOutput(self, "Logs", value=cf_access_logs.bucket_name, export_name='LogsBucket') CfnOutput(self, "SFName", value=workflow.state_machine_name, export_name='SFName') CfnOutput(self, "SFArn", value = workflow.state_machine_arn, export_name = 'StepFunctionArn', description = 'Step Function arn')
def create_state_machine(self, services): task_pngextract = aws_stepfunctions_tasks.LambdaInvoke( self, "PDF. Conver to PNGs", lambda_function = services["lambda"]["pngextract"], payload_response_only=True, result_path = "$.image_keys" ) task_wrapup = aws_stepfunctions_tasks.LambdaInvoke( self, "Wrapup and Clean", lambda_function = services["lambda"]["wrapup"] ) iterate_sqs_to_textract = aws_stepfunctions_tasks.SqsSendMessage( self, "Perform Textract and A2I", queue=services["textract_sqs"], message_body = aws_stepfunctions.TaskInput.from_object({ "token": aws_stepfunctions.Context.task_token, "id.$": "$.id", "bucket.$": "$.bucket", "key.$": "$.key", "wip_key.$": "$.wip_key" }), delay= None, integration_pattern=aws_stepfunctions.ServiceIntegrationPattern.WAIT_FOR_TASK_TOKEN ) process_map = aws_stepfunctions.Map( self, "Process_Map", items_path = "$.image_keys", result_path="DISCARD", parameters = { "id.$": "$.id", "bucket.$": "$.bucket", "key.$": "$.key", "wip_key.$": "$$.Map.Item.Value" } ).iterator(iterate_sqs_to_textract) choice_pass = aws_stepfunctions.Pass( self, "Image. Passing.", result=aws_stepfunctions.Result.from_array(["single_image"]), result_path="$.image_keys" ) pdf_or_image_choice = aws_stepfunctions.Choice(self, "PDF or Image?") pdf_or_image_choice.when(aws_stepfunctions.Condition.string_equals("$.extension", "pdf"), task_pngextract) pdf_or_image_choice.when(aws_stepfunctions.Condition.string_equals("$.extension", "png"), choice_pass) pdf_or_image_choice.when(aws_stepfunctions.Condition.string_equals("$.extension", "jpg"), choice_pass) # Creates the Step Functions multipagepdfa2i_sf = aws_stepfunctions.StateMachine( scope = self, id = "multipagepdfa2i_stepfunction", state_machine_name = "multipagepdfa2i_stepfunction", definition=pdf_or_image_choice.afterwards().next(process_map).next(task_wrapup) ) return multipagepdfa2i_sf