Ejemplo n.º 1
0
#!/usr/bin/env python
from diagrams import Diagram
from diagrams.aws.compute import Lambda
from diagrams.aws.ml import Personalize
from diagrams.aws.analytics import KinesisDataStreams, KinesisDataFirehose, Athena, Quicksight, Glue
from diagrams.onprem.client import Client
from diagrams.aws.storage import S3

with Diagram('イベントストリーミング'):
    # インスタンス化によってノードを作成
    # ノードにラベルを付与でき、\nを入れることでラベルの改行も可能
    stream = KinesisDataStreams('Kinesis\nData Streams')
    s3 = S3('S3')
    athena = Athena('Athena')

    # 定義したノードを始点とした流れを作成
    # 変数に代入せずとも、ノードは作成可能
    Client() >> stream >> Lambda('Lambda') >> Personalize('Personalize\nEventTracker')
    stream >> KinesisDataFirehose('Kinesis\nData Firehose') >> s3
    s3 - athena >> Quicksight('QuickSight') << Client()
    s3 >> Glue('Glue') >> athena
Ejemplo n.º 2
0
    Quicksight,
)
from diagrams.aws.ml import SagemakerNotebook, SagemakerTrainingJob, SagemakerModel

with Diagram("AWS ML Lab", show=False):
    iot = IotRule("Engine devices")
    inference = Kinesis("real-time")
    source = KinesisDataFirehose("batches")
    quicksight = Quicksight("dashboard")
    with Cluster("VPC"):
        with Cluster("Training"):
            submissions = S3("Submissions")
            curated = S3("CuratedData")

            submissions_crawler = GlueCrawlers("submissions crawler")
            curated_crawler = Glue("ETL")

            ctas = Athena("train/eval split")

            catalog = GlueDataCatalog("data catalog")

            notebooks = SagemakerNotebook("Build Model")
            job = SagemakerTrainingJob("Train Model")
            model = SagemakerModel("Fitted Model")

        with Cluster("Inference"):

            endpointLambda = Lambda("call endpoint")
            with Cluster("Multi AZ endpoints") as az:
                endpoints = [
                    Endpoint("us-east-1a"),
Ejemplo n.º 3
0
        with Cluster("Hadoop", graph_attr=graph_attr):
            beam = Beam("\nBeam")
            airflow = Airflow("\nAirflow")
            sqoop = Hadoop("\nSqoop")
            beam - [airflow] - sqoop
            pig = Hadoop("\nPig")
            hive = Hive("\nHive")
            nifi = Nifi("\nNiFi")
            pig - [hive] - nifi
        with Cluster("Classic ETL", graph_attr=graph_attr):
            sql = Mssql("\nSSIS")
            abinitio = DataTransfer("\nAb Initio")
            embulk = Embulk("\nEmbulk")
            informatica = DataTransfer("\nInformatica")
            embulk - [abinitio]
            sql - [informatica]

    with Cluster("Cloud", graph_attr=graph_attr):
        with Cluster("Azure", graph_attr=graph_attr):
            adf = DataFactories("\nData\nFactory")
            azdatabricks = Databricks("\nDatabricks")
            polybase = SQLDatawarehouse("\nPolyBase\nin Synapse")
            fn = FunctionApps("\nAzure\nFunctions")
            adf - [azdatabricks]
            polybase - [fn]
        with Cluster("AWS", graph_attr=graph_attr):
            glue = Glue("\nGlue")
            awsdatabricks = Databricks("\nDatabricks")
            l = Lambda("\nLambda")
            glue - [awsdatabricks]
            l
Ejemplo n.º 4
0
from diagrams.onprem.client import User
from diagrams.onprem.compute import Server
from diagrams.onprem.network import Internet

with Diagram(name="", show=False, direction="LR",
             filename="setup/architecture",
             graph_attr={"dpi": "300"}) as diag:
    with Cluster("Source"):
        source = Server("HTTP")
    with Cluster("Data load"):
        storage = S3("Data storage")
        download = ECS("ECS download task")
        unzip_trigger = Lambda("S3 event trigger")
        unzip = ECS("ECS unzip task")
    with Cluster("Data processing"):
        parse = Glue("Glue Spark XML\nparser")
        catalog = GlueDataCatalog("Data Catalog")
        with Cluster("Feature engineering"):
            train_features = Glue("Glue Spark job:\ntrain features")
            predict_features = Glue("Glue Spark job:\nprediction features")
        prediction_db = Dynamodb("Prediction database")
    with Cluster("ML model"):
        cluster = EMRCluster("EMR Cluster")
        model_fit = Spark("Spark + MLeap")
        model = Sagemaker("Sagemaker endpoint")
    with Cluster("Serving"):
        app = Internet("Web app")
        api = APIGateway("REST API")
        predict_trigger = Lambda("Request/response\nhandler")
    user = User("End user")
Ejemplo n.º 5
0
from diagrams import Cluster, Diagram, Edge
from diagrams.aws.analytics import (Athena, Glue, GlueCrawlers,
                                    GlueDataCatalog, Quicksight)
from diagrams.aws.storage import S3

with Diagram("demo-diagram", show=True):

    with Cluster('ETL - csv to parquet with data-type fine tune'):
        raw_data = S3('raw nyc-tlc-data')
        elt = Glue('Glue Job')
        parquet_data = S3('Parquet')
        raw_data >> elt >> parquet_data

    with Cluster('Exploratory Data Analysis'):
        crawler = GlueCrawlers('Crawler')
        catalog = GlueDataCatalog('DataCatalog')
        eda = Athena('Athena')
    parquet_data >> crawler >> catalog >> eda

    with Cluster('ETL - Filter outliers'):
        etl = Glue('Glue Job')
        clear_data = S3('DataCube')
        crawler = GlueCrawlers('Crawler')
        catalog = GlueDataCatalog('DataCatalog')
        engine = Athena('Athena')
        dashboard = Quicksight('Dashboard')
    parquet_data >> etl >> clear_data
    clear_data >> crawler >> catalog >> engine >> dashboard

    insight = Edge(label='Insight', style="dashed")
    eda >> insight >> etl
Ejemplo n.º 6
0
            with Cluster("Private Subnets") as priv1:
                Blank("")
                singer1 = Singer("Singer.io\nExtract/Loads\n(ECS Fargate)")
                Blank("")
                Blank("")
                Blank("")
                # with Cluster("Private Subnet (2)") as priv2:
                tableau = Tableau("Tableau Server\n(EC2)")

        with Cluster("S3 Data Lake"):
            s3data = storage.S3("Data Bucket")
            s3meta = storage.S3("Metadata Bucket")
            s3logs = storage.S3("Logging Bucket")
            sftp = TransferForSftp("SFTP\nTransfer Service")
            py_fn1 = compute.Lambda("File Listener\n(Lambda Python)")
            glue = Glue("Spark Transforms\n(Glue)")

        # with Cluster("AWS Serverless"):
        events = Eventbridge("Event Triggers\n(AWS Eventbridge)")
        secrets = security.SecretsManager("AWS Secrets\nManager")
        cw = Cloudwatch("Cloudwatch Logs")

    source = Internet("External\nData Source")

    py_fn1 << s3data << py_fn1
    glue << s3data << glue

    nat << singer1
    nat >> source
    elb >> tableau
    s3meta >> singer1 >> s3data