#!/usr/bin/env python from diagrams import Diagram from diagrams.aws.compute import Lambda from diagrams.aws.ml import Personalize from diagrams.aws.analytics import KinesisDataStreams, KinesisDataFirehose, Athena, Quicksight, Glue from diagrams.onprem.client import Client from diagrams.aws.storage import S3 with Diagram('イベントストリーミング'): # インスタンス化によってノードを作成 # ノードにラベルを付与でき、\nを入れることでラベルの改行も可能 stream = KinesisDataStreams('Kinesis\nData Streams') s3 = S3('S3') athena = Athena('Athena') # 定義したノードを始点とした流れを作成 # 変数に代入せずとも、ノードは作成可能 Client() >> stream >> Lambda('Lambda') >> Personalize('Personalize\nEventTracker') stream >> KinesisDataFirehose('Kinesis\nData Firehose') >> s3 s3 - athena >> Quicksight('QuickSight') << Client() s3 >> Glue('Glue') >> athena
Quicksight, ) from diagrams.aws.ml import SagemakerNotebook, SagemakerTrainingJob, SagemakerModel with Diagram("AWS ML Lab", show=False): iot = IotRule("Engine devices") inference = Kinesis("real-time") source = KinesisDataFirehose("batches") quicksight = Quicksight("dashboard") with Cluster("VPC"): with Cluster("Training"): submissions = S3("Submissions") curated = S3("CuratedData") submissions_crawler = GlueCrawlers("submissions crawler") curated_crawler = Glue("ETL") ctas = Athena("train/eval split") catalog = GlueDataCatalog("data catalog") notebooks = SagemakerNotebook("Build Model") job = SagemakerTrainingJob("Train Model") model = SagemakerModel("Fitted Model") with Cluster("Inference"): endpointLambda = Lambda("call endpoint") with Cluster("Multi AZ endpoints") as az: endpoints = [ Endpoint("us-east-1a"),
with Cluster("Hadoop", graph_attr=graph_attr): beam = Beam("\nBeam") airflow = Airflow("\nAirflow") sqoop = Hadoop("\nSqoop") beam - [airflow] - sqoop pig = Hadoop("\nPig") hive = Hive("\nHive") nifi = Nifi("\nNiFi") pig - [hive] - nifi with Cluster("Classic ETL", graph_attr=graph_attr): sql = Mssql("\nSSIS") abinitio = DataTransfer("\nAb Initio") embulk = Embulk("\nEmbulk") informatica = DataTransfer("\nInformatica") embulk - [abinitio] sql - [informatica] with Cluster("Cloud", graph_attr=graph_attr): with Cluster("Azure", graph_attr=graph_attr): adf = DataFactories("\nData\nFactory") azdatabricks = Databricks("\nDatabricks") polybase = SQLDatawarehouse("\nPolyBase\nin Synapse") fn = FunctionApps("\nAzure\nFunctions") adf - [azdatabricks] polybase - [fn] with Cluster("AWS", graph_attr=graph_attr): glue = Glue("\nGlue") awsdatabricks = Databricks("\nDatabricks") l = Lambda("\nLambda") glue - [awsdatabricks] l
from diagrams.onprem.client import User from diagrams.onprem.compute import Server from diagrams.onprem.network import Internet with Diagram(name="", show=False, direction="LR", filename="setup/architecture", graph_attr={"dpi": "300"}) as diag: with Cluster("Source"): source = Server("HTTP") with Cluster("Data load"): storage = S3("Data storage") download = ECS("ECS download task") unzip_trigger = Lambda("S3 event trigger") unzip = ECS("ECS unzip task") with Cluster("Data processing"): parse = Glue("Glue Spark XML\nparser") catalog = GlueDataCatalog("Data Catalog") with Cluster("Feature engineering"): train_features = Glue("Glue Spark job:\ntrain features") predict_features = Glue("Glue Spark job:\nprediction features") prediction_db = Dynamodb("Prediction database") with Cluster("ML model"): cluster = EMRCluster("EMR Cluster") model_fit = Spark("Spark + MLeap") model = Sagemaker("Sagemaker endpoint") with Cluster("Serving"): app = Internet("Web app") api = APIGateway("REST API") predict_trigger = Lambda("Request/response\nhandler") user = User("End user")
from diagrams import Cluster, Diagram, Edge from diagrams.aws.analytics import (Athena, Glue, GlueCrawlers, GlueDataCatalog, Quicksight) from diagrams.aws.storage import S3 with Diagram("demo-diagram", show=True): with Cluster('ETL - csv to parquet with data-type fine tune'): raw_data = S3('raw nyc-tlc-data') elt = Glue('Glue Job') parquet_data = S3('Parquet') raw_data >> elt >> parquet_data with Cluster('Exploratory Data Analysis'): crawler = GlueCrawlers('Crawler') catalog = GlueDataCatalog('DataCatalog') eda = Athena('Athena') parquet_data >> crawler >> catalog >> eda with Cluster('ETL - Filter outliers'): etl = Glue('Glue Job') clear_data = S3('DataCube') crawler = GlueCrawlers('Crawler') catalog = GlueDataCatalog('DataCatalog') engine = Athena('Athena') dashboard = Quicksight('Dashboard') parquet_data >> etl >> clear_data clear_data >> crawler >> catalog >> engine >> dashboard insight = Edge(label='Insight', style="dashed") eda >> insight >> etl
with Cluster("Private Subnets") as priv1: Blank("") singer1 = Singer("Singer.io\nExtract/Loads\n(ECS Fargate)") Blank("") Blank("") Blank("") # with Cluster("Private Subnet (2)") as priv2: tableau = Tableau("Tableau Server\n(EC2)") with Cluster("S3 Data Lake"): s3data = storage.S3("Data Bucket") s3meta = storage.S3("Metadata Bucket") s3logs = storage.S3("Logging Bucket") sftp = TransferForSftp("SFTP\nTransfer Service") py_fn1 = compute.Lambda("File Listener\n(Lambda Python)") glue = Glue("Spark Transforms\n(Glue)") # with Cluster("AWS Serverless"): events = Eventbridge("Event Triggers\n(AWS Eventbridge)") secrets = security.SecretsManager("AWS Secrets\nManager") cw = Cloudwatch("Cloudwatch Logs") source = Internet("External\nData Source") py_fn1 << s3data << py_fn1 glue << s3data << glue nat << singer1 nat >> source elb >> tableau s3meta >> singer1 >> s3data