def secure_bucket(self, name, suppressions=None, **kwargs): bucket = Bucket(self, name, removal_policy=RemovalPolicy.RETAIN, encryption=BucketEncryption.S3_MANAGED, block_public_access=BlockPublicAccess.BLOCK_ALL, **kwargs) bucket.add_to_resource_policy( iam.PolicyStatement( sid="HttpsOnly", resources=[ bucket.arn_for_objects("*"), ], actions=["*"], effect=iam.Effect.DENY, principals=[iam.AnyPrincipal()], conditions={"Bool": { "aws:SecureTransport": False }}, )) bucket_cfn = bucket.node.default_child # type: CfnResource bucket_cfn.override_logical_id(name) if suppressions: add_cfn_nag_suppressions(bucket_cfn, suppressions) return bucket
def provide_access_to_artifacts(scope: core.Construct, *, pipeline_def: Pipeline, artifact_bucket: aws_s3.Bucket) -> None: role_arns = set() for role_arn in pipeline_def.get("artifact_access", {}).get("role_arns", []): role_arns.add(role_arn) for stage_def in pipeline_def["stages"]: for action_def in stage_def["actions"]: if "role_arn" in action_def: account = core.Arn.parse(action_def["role_arn"]).account if account != core.Stack.of(scope).account: role_arns.add(action_def["role_arn"]) for role_arn in role_arns: artifact_bucket.add_to_resource_policy( aws_iam.PolicyStatement( actions=["s3:Get*"], resources=[artifact_bucket.arn_for_objects("*")], effect=aws_iam.Effect.ALLOW, principals=[aws_iam.ArnPrincipal(role_arn)], ))
class Anime(core.Stack): def __init__(self, app: core.App, id: str, mal_client_id: str, anidb_client: str, domain_name: str, **kwargs) -> None: super().__init__(app, id, **kwargs) self.mal_client_id = mal_client_id self.anidb_client = anidb_client self.domain_name = domain_name self.layers = {} self.lambdas = {} self._create_buckets() self._create_tables() self._create_queues() self._create_lambdas_config() self._create_layers() self._create_lambdas() self._create_gateway() def _create_buckets(self): self.anidb_titles_bucket = Bucket( self, "anidb_titles_bucket", block_public_access=BlockPublicAccess( block_public_acls=True, block_public_policy=True, ), removal_policy=core.RemovalPolicy.DESTROY, lifecycle_rules=[ LifecycleRule(expiration=Duration.days(3)), ] ) def _create_tables(self): self.anime_table = Table( self, "anime_items", partition_key=Attribute(name="id", type=AttributeType.STRING), billing_mode=BillingMode.PAY_PER_REQUEST, ) self.anime_table.add_global_secondary_index( partition_key=Attribute(name="mal_id", type=AttributeType.NUMBER), index_name="mal_id" ) self.anime_table.add_global_secondary_index( partition_key=Attribute(name="broadcast_day", type=AttributeType.STRING), index_name="broadcast_day" ) self.anime_episodes = Table( self, "anime_episodes", partition_key=Attribute(name="anime_id", type=AttributeType.STRING), sort_key=Attribute(name="episode_number", type=AttributeType.NUMBER), billing_mode=BillingMode.PAY_PER_REQUEST, ) self.anime_episodes.add_local_secondary_index( sort_key=Attribute(name="id", type=AttributeType.STRING), index_name="episode_id" ) self.anime_episodes.add_global_secondary_index( partition_key=Attribute(name="anidb_id", type=AttributeType.NUMBER), index_name="anidb_id" ) self.anime_params = Table( self, "anime_params", partition_key=Attribute(name="name", type=AttributeType.STRING), billing_mode=BillingMode.PAY_PER_REQUEST, ) def _create_queues(self): post_anime_dl = Queue(self, "post_anime_dl") self.post_anime_queue = Queue( self, "anime", dead_letter_queue=DeadLetterQueue(max_receive_count=5, queue=post_anime_dl), receive_message_wait_time=Duration.seconds(20) ) def _create_lambdas_config(self): self.lambdas_config = { "api-anime_by_id": { "layers": ["utils", "databases"], "variables": { "ANIME_DATABASE_NAME": self.anime_table.table_name, "LOG_LEVEL": "INFO", }, "policies": [ PolicyStatement( actions=["dynamodb:GetItem"], resources=[self.anime_table.table_arn] ) ], "timeout": 3, "memory": 128 }, "api-anime_episodes": { "layers": ["utils", "databases"], "variables": { "ANIME_EPISODES_DATABASE_NAME": self.anime_episodes.table_name, "LOG_LEVEL": "INFO", }, "policies": [ PolicyStatement( actions=["dynamodb:Query"], resources=[self.anime_episodes.table_arn] ), PolicyStatement( actions=["dynamodb:Query"], resources=[f"{self.anime_episodes.table_arn}/index/anidb_id"] ) ], "timeout": 3, "memory": 512 }, "api-anime_episode": { "layers": ["utils", "databases"], "variables": { "ANIME_EPISODES_DATABASE_NAME": self.anime_episodes.table_name, "LOG_LEVEL": "INFO", }, "policies": [ PolicyStatement( actions=["dynamodb:Query"], resources=[f"{self.anime_episodes.table_arn}/index/episode_id"] ) ], "timeout": 3, "memory": 512 }, "api-anime": { "layers": ["utils", "databases", "api"], "variables": { "ANIME_DATABASE_NAME": self.anime_table.table_name, "POST_ANIME_SQS_QUEUE_URL": self.post_anime_queue.queue_url, "LOG_LEVEL": "INFO", }, "policies": [ PolicyStatement( actions=["dynamodb:Query"], resources=[f"{self.anime_table.table_arn}/index/mal_id"] ), PolicyStatement( actions=["sqs:SendMessage"], resources=[self.post_anime_queue.queue_arn] ), ], "timeout": 10, "memory": 128 }, "crons-titles_updater": { "layers": ["utils", "databases", "api"], "variables": { "ANIDB_TITLES_BUCKET": self.anidb_titles_bucket.bucket_name, "LOG_LEVEL": "INFO", }, "concurrent_executions": 1, "policies": [ PolicyStatement( actions=["s3:ListBucket"], resources=[self.anidb_titles_bucket.bucket_arn] ), PolicyStatement( actions=["s3:GetObject", "s3:PutObject"], resources=[self.anidb_titles_bucket.arn_for_objects("*")] ) ], "timeout": 120, "memory": 128 }, "crons-episodes_updater": { "layers": ["utils", "databases"], "variables": { "LOG_LEVEL": "DEBUG", "POST_ANIME_SQS_QUEUE_URL": self.post_anime_queue.queue_url, "ANIME_DATABASE_NAME": self.anime_table.table_name, }, "concurrent_executions": 1, "policies": [ PolicyStatement( actions=["dynamodb:Query"], resources=[f"{self.anime_table.table_arn}/index/broadcast_day"] ), PolicyStatement( actions=["sqs:SendMessage"], resources=[self.post_anime_queue.queue_arn] ), ], "timeout": 120, "memory": 128 }, "sqs_handlers-post_anime": { "layers": ["utils", "databases", "api"], "variables": { "ANIME_DATABASE_NAME": self.anime_table.table_name, "ANIME_EPISODES_DATABASE_NAME": self.anime_episodes.table_name, "ANIME_PARAMS_DATABASE_NAME": self.anime_params.table_name, "MAL_CLIENT_ID": self.mal_client_id, "ANIDB_TITLES_BUCKET": self.anidb_titles_bucket.bucket_name, "ANIDB_CLIENT": self.anidb_client, "LOG_LEVEL": "INFO", }, "concurrent_executions": 1, "policies": [ PolicyStatement( actions=["dynamodb:Query"], resources=[f"{self.anime_table.table_arn}/index/mal_id"] ), PolicyStatement( actions=["dynamodb:UpdateItem"], resources=[self.anime_table.table_arn] ), PolicyStatement( actions=["dynamodb:BatchWriteItem"], resources=[self.anime_episodes.table_arn] ), PolicyStatement( actions=["dynamodb:UpdateItem", "dynamodb:GetItem"], resources=[self.anime_params.table_arn] ), PolicyStatement( actions=["s3:ListBucket"], resources=[self.anidb_titles_bucket.bucket_arn] ), PolicyStatement( actions=["s3:GetObject"], resources=[self.anidb_titles_bucket.arn_for_objects("*")] ) ], "timeout": 60, "memory": 2048 }, } def _create_layers(self): if os.path.isdir(BUILD_FOLDER): shutil.rmtree(BUILD_FOLDER) os.mkdir(BUILD_FOLDER) for layer in os.listdir(LAYERS_DIR): layer_folder = os.path.join(LAYERS_DIR, layer) build_folder = os.path.join(BUILD_FOLDER, layer) shutil.copytree(layer_folder, build_folder) requirements_path = os.path.join(build_folder, "requirements.txt") if os.path.isfile(requirements_path): packages_folder = os.path.join(build_folder, "python", "lib", "python3.8", "site-packages") # print(f"Installing layer requirements to target: {os.path.abspath(packages_folder)}") subprocess.check_output(["pip", "install", "-r", requirements_path, "-t", packages_folder]) clean_pycache() self.layers[layer] = LayerVersion( self, layer, layer_version_name=f"anime-{layer}", code=Code.from_asset(path=build_folder), compatible_runtimes=[Runtime.PYTHON_3_8], ) def _create_lambdas(self): clean_pycache() for root, dirs, files in os.walk(LAMBDAS_DIR): for f in files: if f != "__init__.py": continue parent_folder = os.path.basename(os.path.dirname(root)) lambda_folder = os.path.basename(root) name = f"{parent_folder}-{lambda_folder}" lambda_config = self.lambdas_config[name] layers = [] for layer_name in lambda_config["layers"]: layers.append(self.layers[layer_name]) lambda_role = Role( self, f"{name}_role", assumed_by=ServicePrincipal(service="lambda.amazonaws.com") ) for policy in lambda_config["policies"]: lambda_role.add_to_policy(policy) lambda_role.add_managed_policy( ManagedPolicy.from_aws_managed_policy_name("service-role/AWSLambdaBasicExecutionRole")) lambda_args = { "code": Code.from_asset(root), "handler": "__init__.handle", "runtime": Runtime.PYTHON_3_8, "layers": layers, "function_name": name, "environment": lambda_config["variables"], "role": lambda_role, "timeout": Duration.seconds(lambda_config["timeout"]), "memory_size": lambda_config["memory"], } if "concurrent_executions" in lambda_config: lambda_args["reserved_concurrent_executions"] = lambda_config["concurrent_executions"] self.lambdas[name] = Function(self, name, **lambda_args) self.lambdas["sqs_handlers-post_anime"].add_event_source(SqsEventSource(self.post_anime_queue)) Rule( self, "titles_updater", schedule=Schedule.cron(hour="2", minute="10"), targets=[LambdaFunction(self.lambdas["crons-titles_updater"])] ) Rule( self, "episodes_updater", schedule=Schedule.cron(hour="4", minute="10"), targets=[LambdaFunction(self.lambdas["crons-episodes_updater"])] ) def _create_gateway(self): cert = Certificate( self, "certificate", domain_name=self.domain_name, validation_method=ValidationMethod.DNS ) domain_name = DomainName( self, "domain", domain_name=self.domain_name, certificate=cert, security_policy=SecurityPolicy.TLS_1_2 ) http_api = HttpApi( self, "anime_gateway", create_default_stage=False, api_name="anime", cors_preflight=CorsPreflightOptions( allow_methods=[HttpMethod.GET, HttpMethod.POST], allow_origins=["https://moshan.tv", "https://beta.moshan.tv"], allow_headers=["authorization", "content-type", "x-mal-client-id"] ) ) authorizer = CfnAuthorizer( self, "cognito", api_id=http_api.http_api_id, authorizer_type="JWT", identity_source=["$request.header.Authorization"], name="cognito", jwt_configuration=CfnAuthorizer.JWTConfigurationProperty( audience=["68v5rahd0sdvrmf7fgbq2o1a9u"], issuer="https://cognito-idp.eu-west-1.amazonaws.com/eu-west-1_sJ3Y4kSv6" ) ) routes = { "get_anime": { "method": "GET", "route": "/anime", "target_lambda": self.lambdas["api-anime"] }, "post_anime": { "method": "POST", "route": "/anime", "target_lambda": self.lambdas["api-anime"] }, "get_anime_by_id": { "method": "GET", "route": "/anime/{id}", "target_lambda": self.lambdas["api-anime_by_id"] }, "get_anime_episodes": { "method": "GET", "route": "/anime/{id}/episodes", "target_lambda": self.lambdas["api-anime_episodes"] }, "post_anime_episode": { "method": "POST", "route": "/anime/{id}/episodes", "target_lambda": self.lambdas["api-anime_episodes"] }, "get_anime_episode": { "method": "GET", "route": "/anime/{id}/episodes/{episode_id}", "target_lambda": self.lambdas["api-anime_episode"] }, } for r in routes: integration = HttpIntegration( self, f"{r}_integration", http_api=http_api, integration_type=HttpIntegrationType.LAMBDA_PROXY, integration_uri=routes[r]["target_lambda"].function_arn, method=getattr(HttpMethod, routes[r]["method"]), payload_format_version=PayloadFormatVersion.VERSION_2_0, ) CfnRoute( self, r, api_id=http_api.http_api_id, route_key=f"{routes[r]['method']} {routes[r]['route']}", authorization_type="JWT", authorizer_id=authorizer.ref, target="integrations/" + integration.integration_id ) routes[r]["target_lambda"].add_permission( f"{r}_apigateway_invoke", principal=ServicePrincipal("apigateway.amazonaws.com"), source_arn=f"arn:aws:execute-api:{self.region}:{self.account}:{http_api.http_api_id}/*" ) mal_proxy_integration = HttpIntegration( self, "mal_proxy_integration", http_api=http_api, integration_type=HttpIntegrationType.HTTP_PROXY, integration_uri="https://api.myanimelist.net/v2/{proxy}", method=HttpMethod.ANY, payload_format_version=PayloadFormatVersion.VERSION_1_0, ) CfnRoute( self, "mal_proxy_route", api_id=http_api.http_api_id, route_key="GET /mal_proxy/{proxy+}", authorization_type="JWT", authorizer_id=authorizer.ref, target="integrations/" + mal_proxy_integration.integration_id, ) stage = CfnStage( self, "live", api_id=http_api.http_api_id, auto_deploy=True, default_route_settings=CfnStage.RouteSettingsProperty( throttling_burst_limit=10, throttling_rate_limit=5 ), stage_name="live" ) HttpApiMapping( self, "mapping", api=http_api, domain_name=domain_name, stage=stage )
def __init__(self, scope: Construct, id: str, name: str, db: Database, bucket: Bucket, prefix: str, hudi_exclusions: List[str] = None, **kwargs): super().__init__(scope, id, **kwargs) crawler_name = 'ara-' + name crawler_role = Role( self, 'GenericCrawler', role_name=crawler_name + '-crawler', assumed_by=ServicePrincipal('glue.amazonaws.com'), inline_policies={ 'CrawlerPermissions': PolicyDocument(statements=[ PolicyStatement( actions=["glue:GetDatabase"], resources=[ "arn:aws:glue:{}:{}:catalog".format( Aws.REGION, Aws.ACCOUNT_ID), "arn:aws:glue:{}:{}:database/{}".format( Aws.REGION, Aws.ACCOUNT_ID, db.database_name) ]), PolicyStatement(actions=[ "glue:BatchCreatePartition", "glue:BatchGetPartition", "glue:GetPartition", "glue:UpdatePartition", "glue:GetTable", "glue:CreateTable", "glue:UpdateTable" ], resources=[ "arn:aws:glue:{}:{}:catalog".format( Aws.REGION, Aws.ACCOUNT_ID), "arn:aws:glue:{}:{}:database/{}". format(Aws.REGION, Aws.ACCOUNT_ID, db.database_name), "arn:aws:glue:{}:{}:table/{}/*".format( Aws.REGION, Aws.ACCOUNT_ID, db.database_name) ]), PolicyStatement( actions=['s3:GetObject'], resources=[bucket.arn_for_objects(prefix + '*')]), PolicyStatement(actions=["s3:ListBucket"], resources=[bucket.bucket_arn]), PolicyStatement( actions=['logs:CreateLogGroup'], resources=[ 'arn:aws:logs:{}:{}:log-group:/aws-glue/crawlers*'. format(Aws.REGION, Aws.ACCOUNT_ID) ]), PolicyStatement( actions=['logs:CreateLogStream', 'logs:PutLogEvents'], resources=[ 'arn:aws:logs:{}:{}:log-group:/aws-glue/crawlers:log-stream:{}' .format(Aws.REGION, Aws.ACCOUNT_ID, crawler_name) ]) ]) }) # excluding hudi tables from the crawler because it's not supported if hudi_exclusions is not None: exclusions = ["**_SUCCESS", "**crc"] + list( map(lambda x: x + '/**', hudi_exclusions)) else: exclusions = ["**_SUCCESS", "**crc"] self.__crawler = CfnCrawler(self, 'Generic', name=crawler_name, database_name=db.database_name, role=crawler_role.role_arn, targets={ "s3Targets": [{ "path": f'{bucket.bucket_name}/{prefix}', "exclusions": exclusions }] })