Example #1
0
def create_config(bosslet_config, lookup=True):
    """Create the CloudFormationConfiguration object."""
    config = CloudFormationConfiguration('activities', bosslet_config)
    names = bosslet_config.names
    keypair = bosslet_config.SSH_KEY
    session = bosslet_config.session

    vpc_id = config.find_vpc()
    sgs = aws.sg_lookup_all(session, vpc_id)
    internal_subnets, _ = config.find_all_subnets()
    internal_subnets_asg, _ = config.find_all_subnets(compatibility='asg')

    topic_arn = aws.sns_topic_lookup(session, bosslet_config.ALERT_TOPIC)
    if topic_arn is None:
        raise MissingResourceError('SNS topic', bosslet_config.ALERT_TOPIC)

    event_data = {
        "lambda-name": "delete_lambda",
        "db": names.endpoint_db.rds,
        "meta-db": names.meta.ddb,
        "s3-index-table": names.s3_index.ddb,
        "id-index-table": names.id_index.ddb,
        "id-count-table": names.id_count_index.ddb,
        "cuboid_bucket": names.cuboid_bucket.s3,
        "delete_bucket": names.delete_bucket.s3,
        "topic-arn": topic_arn,
        "query-deletes-sfn-name": names.query_deletes.sfn,
        "delete-sfn-name": names.delete_cuboid.sfn,
        "delete-exp-sfn-name": names.delete_experiment.sfn,
        "delete-coord-frame-sfn-name": names.delete_coord_frame.sfn,
        "delete-coll-sfn-name": names.delete_collection.sfn,
    }

    role_arn = aws.role_arn_lookup(session, "events_for_delete_lambda")
    multi_lambda = names.multi_lambda.lambda_
    if lookup:
        lambda_arn = aws.lambda_arn_lookup(session, multi_lambda)
    else:
        lambda_arn = None
    target_list = [{
        "Arn": lambda_arn,
        "Id": multi_lambda,
        "Input": json.dumps(event_data)
    }]
    schedule_expression = "cron(1 6-11/1 ? * TUE-FRI *)"
    #schedule_expression = "cron(0/2 * * * ? *)"  # testing fire every two minutes

    config.add_event_rule("DeleteEventRule",
                          # XXX What type for event rules?
                          names.delete_event_rule.dns,
                          role_arn=role_arn,
                          schedule_expression=schedule_expression,
                          target_list=target_list,
                          state='DISABLED')   # Disabled until new delete is finished.

    # Events have to be given permission to run lambda.
    config.add_lambda_permission('DeleteRulePerm',
                                 multi_lambda,
                                 principal='events.amazonaws.com',
                                 source=Arn('DeleteEventRule'))
    user_data = UserData()
    user_data["system"]["fqdn"] = names.activities.dns
    user_data["system"]["type"] = "activities"
    user_data["aws"]["db"] = names.endpoint_db.rds
    user_data["aws"]["cache"] = names.cache.redis
    user_data["aws"]["cache-state"] = names.cache_state.redis
    user_data["aws"]["cache-db"] = "0"
    user_data["aws"]["cache-state-db"] = "0"
    user_data["aws"]["meta-db"] = names.meta.ddb
    user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket.s3
    user_data["aws"]["tile_bucket"] = names.tile_bucket.s3
    user_data["aws"]["ingest_bucket"] = names.ingest_bucket.s3
    user_data["aws"]["s3-index-table"] = names.s3_index.ddb
    user_data["aws"]["tile-index-table"] = names.tile_index.ddb
    user_data["aws"]["id-index-table"] = names.id_index.ddb
    user_data["aws"]["id-count-table"] = names.id_count_index.ddb
    user_data["aws"]["max_task_id_suffix"] = str(const.MAX_TASK_ID_SUFFIX)
    user_data["aws"]["tile_ingest_lambda"] = names.tile_ingest.lambda_
    user_data["aws"]["tile_uploaded_lambda"] = names.tile_uploaded.lambda_

    config.add_autoscale_group("Activities",
                               names.activities.dns,
                               aws.ami_lookup(bosslet_config, names.activities.ami),
                               keypair,
                               subnets=internal_subnets_asg,
                               type_=const.ACTIVITIES_TYPE,
                               security_groups=[sgs[names.internal.sg]],
                               user_data=str(user_data),
                               role=aws.instance_profile_arn_lookup(session, "activities"),
                               min=1,
                               max=1)

    config.add_lambda("IngestLambda",
                      names.ingest_lambda.lambda_,
                      aws.role_arn_lookup(session, 'IngestQueueUpload'),
                      const.INGEST_LAMBDA,
                      handler="index.handler",
                      timeout=60 * 5,
                      runtime='python3.6',
                      memory=3008)

    config.add_lambda_permission("IngestLambdaExecute", Ref("IngestLambda"))


    # Downsample / Resolution Hierarchy support
    lambda_role = aws.role_arn_lookup(session, "lambda_resolution_hierarchy")

    config.add_lambda("DownsampleVolumeLambda",
                      names.downsample_volume.lambda_,
                      lambda_role,
                      handler="downsample_volume.handler",
                      timeout=120,
                      memory=1024,
                      dlq = Ref('DownsampleDLQ'))

    start_sfn_lambda_role = aws.role_arn_lookup(session, 'StartStepFcnLambdaRole')
    config.add_lambda("startSfnLambda",
               names.start_sfn.lambda_,
               start_sfn_lambda_role,
               handler="start_sfn_lambda.handler",
               timeout=60,
               memory=128)

    # This dead letter queue behavior uses a lambda to put failed lambda
    # executions into a dlqs created specifically for each downsample job.
    # There is a separate dlq for each resolution.
    config.add_sns_topic("DownsampleDLQ",
                         names.downsample_dlq.sns,
                         names.downsample_dlq.sns,
                         [('lambda', Arn('DownsampleDLQLambda'))])

    config.add_lambda('DownsampleDLQLambda',
                      names.downsample_dlq.lambda_,
                      lambda_role,
                      const.DOWNSAMPLE_DLQ_LAMBDA,
                      handler='index.handler',
                      runtime='python3.7',
                      timeout=10)

    config.add_lambda_permission('DownsampleDLQLambdaExecute',
                                 Ref('DownsampleDLQLambda'))

    return config
Example #2
0
def create_config(bosslet_config):
    """Create the CloudFormationConfiguration object."""
    config = CloudFormationConfiguration('core', bosslet_config, version="2")
    session = bosslet_config.session
    keypair = bosslet_config.SSH_KEY
    names = bosslet_config.names

    config.add_vpc()

    # Create the internal and external subnets
    config.add_subnet('InternalSubnet', names.internal.subnet)
    config.add_subnet('ExternalSubnet', names.external.subnet)
    internal_subnets, external_subnets = config.add_all_subnets()
    internal_subnets_asg, external_subnets_asg = config.find_all_subnets('asg')

    # Create a custom resource to help delete ENIs from lambdas
    # DP NOTE: After deleting a lambda the ENIs may stick around for while, causing the stack delete to fail
    #          See https://stackoverflow.com/a/41310289
    config.add_arg(Arg.String('StackName', config.stack_name))
    config.add_custom_resource('DeleteENI',
                               'DeleteENI',
                               Arn('DeleteENILambda'),
                               StackName=Ref('StackName'))
    config.add_lambda(
        "DeleteENILambda",
        names.delete_eni.lambda_,
        aws.role_arn_lookup(session, 'DeleteENI'),
        const.DELETE_ENI_LAMBDA,
        handler="index.handler",
        timeout=
        180,  # 3 minutes, so that there is enough time to wait for the ENI detach to complete
        runtime='python3.6'
    )  # If the lambda times out CF will retry a couple of times

    user_data = const.BASTION_USER_DATA.format(bosslet_config.NETWORK)
    config.add_ec2_instance("Bastion",
                            names.bastion.dns,
                            aws.ami_lookup(bosslet_config, const.BASTION_AMI),
                            keypair,
                            subnet=Ref("ExternalSubnet"),
                            public_ip=True,
                            user_data=user_data,
                            security_groups=[
                                Ref("InternalSecurityGroup"),
                                Ref("BastionSecurityGroup")
                            ],
                            depends_on="AttachInternetGateway")

    vault_role = aws.role_arn_lookup(session, 'apl-vault')
    vault_actions = ['kms:Encrypt', 'kms:Decrypt', 'kms:DescribeKey']
    config.add_kms_key("VaultKey", names.vault.key, vault_role, vault_actions)

    config.add_dynamo_table("VaultTable",
                            names.vault.ddb,
                            attributes=[('Path', 'S'), ('Key', 'S')],
                            key_schema=[('Path', 'HASH'), ('Key', 'RANGE')],
                            throughput=(5, 5))

    user_data = UserData()
    user_data["system"]["fqdn"] = names.vault.dns
    user_data["system"]["type"] = "vault"
    user_data["vault"]["kms_key"] = str(Ref("VaultKey"))
    user_data["vault"]["ddb_table"] = names.vault.ddb
    parsed_user_data = {
        "Fn::Join": ["", user_data.format_for_cloudformation()]
    }
    config.add_autoscale_group("Vault",
                               names.vault.dns,
                               aws.ami_lookup(bosslet_config, names.vault.ami),
                               keypair,
                               subnets=internal_subnets_asg,
                               type_=const.VAULT_TYPE,
                               security_groups=[Ref("InternalSecurityGroup")],
                               user_data=parsed_user_data,
                               min=const.VAULT_CLUSTER_SIZE,
                               max=const.VAULT_CLUSTER_SIZE,
                               notifications=Ref("DNSSNS"),
                               role=aws.instance_profile_arn_lookup(
                                   session, 'apl-vault'),
                               depends_on=[
                                   "VaultKey", "VaultTable", "DNSLambda",
                                   "DNSSNS", "DNSLambdaExecute"
                               ])

    user_data = UserData()
    user_data["system"]["fqdn"] = names.auth.dns
    user_data["system"]["type"] = "auth"
    deps = [
        "AuthSecurityGroup", "AttachInternetGateway", "DNSLambda", "DNSSNS",
        "DNSLambdaExecute"
    ]

    # Problem: If development scenario uses a local DB. If the auth server crashes
    #          and is auto restarted by the autoscale group then the new auth server
    #          will not have any of the previous configuration, because the old DB
    #          was lost. Using an RDS for development fixes this at the cost of having
    #          the core config taking longer to launch.
    USE_DB = bosslet_config.AUTH_RDS
    if USE_DB:
        deps.append("AuthDB")
        user_data["aws"][
            "db"] = "keycloak"  # flag for init script for which config to use

    cert = aws.cert_arn_lookup(session, names.public_dns('auth'))
    create_asg_elb(config,
                   "Auth",
                   names.auth.dns,
                   aws.ami_lookup(bosslet_config, names.auth.ami),
                   keypair,
                   str(user_data),
                   const.AUTH_CLUSTER_SIZE,
                   internal_subnets_asg,
                   external_subnets_asg, [("443", "8080", "HTTPS", cert)],
                   "HTTP:8080/index.html",
                   sgs=[Ref("AuthSecurityGroup")],
                   type_=const.AUTH_TYPE,
                   depends_on=deps)
    config.add_public_dns('AuthLoadBalancer', names.public_dns('auth'))

    if USE_DB:
        config.add_rds_db("AuthDB",
                          names.auth_db.rds,
                          "3306",
                          "keycloak",
                          "keycloak",
                          "keycloak",
                          internal_subnets,
                          type_="db.t2.micro",
                          security_groups=[Ref("InternalSecurityGroup")])

    config.add_lambda("DNSLambda",
                      names.dns.lambda_,
                      aws.role_arn_lookup(session, 'UpdateRoute53'),
                      const.DNS_LAMBDA,
                      handler="index.handler",
                      timeout=10,
                      depends_on="DNSZone")

    config.add_lambda_permission("DNSLambdaExecute", Ref("DNSLambda"))

    config.add_sns_topic("DNSSNS", names.dns.sns, names.dns.sns,
                         [("lambda", Arn("DNSLambda"))])

    config.add_security_group("InternalSecurityGroup", names.internal.sg,
                              [("-1", "-1", "-1", bosslet_config.NETWORK)])

    # Allow SSH access to bastion from anywhere
    incoming_subnet = bosslet_config.SSH_INBOUND
    config.add_security_group("BastionSecurityGroup", names.ssh.sg,
                              [("tcp", "22", "22", incoming_subnet)])

    incoming_subnet = bosslet_config.HTTPS_INBOUND
    boss_subnet = {
        "Fn::Join": ["/", [Ref("NATIP"), "32"]]
    }  # Allow requests from the endpoint via the NAT gateway
    # Needed in case HTTPS_INBOUND doesn't include the gateway's IP
    config.add_security_group(
        "AuthSecurityGroup",
        #names.https.sg, DP XXX: hack until we can get production updated correctly
        names.auth.sg,
        [("tcp", "443", "443", incoming_subnet),
         ("tcp", "443", "443", boss_subnet)])

    # Create the internal route table to route traffic to the NAT Bastion
    all_internal_subnets = internal_subnets.copy()
    all_internal_subnets.append(Ref("InternalSubnet"))
    config.add_route_table("InternalRouteTable",
                           names.internal.rt,
                           subnets=all_internal_subnets)

    config.add_route_table_route("InternalNatRoute",
                                 Ref("InternalRouteTable"),
                                 nat=Ref("NAT"),
                                 depends_on="NAT")

    # Create the internet gateway and internet router
    all_external_subnets = external_subnets.copy()
    all_external_subnets.append(Ref("ExternalSubnet"))
    config.add_route_table("InternetRouteTable",
                           names.internet.rt,
                           subnets=all_external_subnets)

    config.add_route_table_route("InternetRoute",
                                 Ref("InternetRouteTable"),
                                 gateway=Ref("InternetGateway"),
                                 depends_on="AttachInternetGateway")

    config.add_internet_gateway("InternetGateway", names.internet.gw)
    config.add_endpoint("S3Endpoint", "s3",
                        [Ref("InternalRouteTable"),
                         Ref('InternetRouteTable')])
    config.add_endpoint("DynamoDBEndpoint", "dynamodb",
                        [Ref("InternalRouteTable"),
                         Ref('InternetRouteTable')])
    config.add_nat("NAT",
                   Ref("ExternalSubnet"),
                   depends_on="AttachInternetGateway")

    return config
Example #3
0
def create_config(session, domain):
    """Create the CloudFormationConfiguration object."""
    config = CloudFormationConfiguration('activities', domain, const.REGION)
    names = AWSNames(domain)

    global keypair
    keypair = aws.keypair_lookup(session)

    vpc_id = config.find_vpc(session)
    sgs = aws.sg_lookup_all(session, vpc_id)
    internal_subnets, _ = config.find_all_availability_zones(session)
    internal_subnets_lambda, _ = config.find_all_availability_zones(
        session, lambda_compatible_only=True)
    topic_arn = aws.sns_topic_lookup(session, "ProductionMicronsMailingList")
    event_data = {
        "lambda-name": "delete_lambda",
        "db": names.endpoint_db,
        "meta-db": names.meta,
        "s3-index-table": names.s3_index,
        "id-index-table": names.id_index,
        "id-count-table": names.id_count_index,
        "cuboid_bucket": names.cuboid_bucket,
        "delete_bucket": names.delete_bucket,
        "topic-arn": topic_arn,
        "query-deletes-sfn-name": names.query_deletes,
        "delete-sfn-name": names.delete_cuboid,
        "delete-exp-sfn-name": names.delete_experiment,
        "delete-coord-frame-sfn-name": names.delete_coord_frame,
        "delete-coll-sfn-name": names.delete_collection
    }

    role_arn = aws.role_arn_lookup(session, "events_for_delete_lambda")
    multi_lambda = names.multi_lambda
    lambda_arn = aws.lambda_arn_lookup(session, multi_lambda)
    target_list = [{
        "Arn": lambda_arn,
        "Id": multi_lambda,
        "Input": json.dumps(event_data)
    }]
    schedule_expression = "cron(1 6-11/1 ? * TUE-FRI *)"
    #schedule_expression = "cron(0/2 * * * ? *)"  # testing fire every two minutes

    config.add_event_rule("DeleteEventRule",
                          names.delete_event_rule,
                          role_arn=role_arn,
                          schedule_expression=schedule_expression,
                          target_list=target_list,
                          description=None)
    # Events have to be given permission to run lambda.
    config.add_lambda_permission('DeleteRulePerm',
                                 multi_lambda,
                                 principal='events.amazonaws.com',
                                 source=Arn('DeleteEventRule'))
    user_data = UserData()
    user_data["system"]["fqdn"] = names.activities
    user_data["system"]["type"] = "activities"
    user_data["aws"]["db"] = names.endpoint_db
    user_data["aws"]["cache"] = names.cache
    user_data["aws"]["cache-state"] = names.cache_state
    user_data["aws"]["cache-db"] = "0"
    user_data["aws"]["cache-state-db"] = "0"
    user_data["aws"]["meta-db"] = names.meta
    user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket
    user_data["aws"]["tile_bucket"] = names.tile_bucket
    user_data["aws"]["ingest_bucket"] = names.ingest_bucket
    user_data["aws"]["s3-index-table"] = names.s3_index
    user_data["aws"]["tile-index-table"] = names.tile_index
    user_data["aws"]["id-index-table"] = names.id_index
    user_data["aws"]["id-count-table"] = names.id_count_index

    config.add_autoscale_group("Activities",
                               names.activities,
                               aws.ami_lookup(session, 'activities.boss'),
                               keypair,
                               subnets=internal_subnets_lambda,
                               type_=const.ACTIVITIES_TYPE,
                               security_groups=[sgs[names.internal]],
                               user_data=str(user_data),
                               role=aws.instance_profile_arn_lookup(
                                   session, "activities"),
                               min=1,
                               max=1)

    config.add_lambda("IngestLambda",
                      names.ingest_lambda,
                      aws.role_arn_lookup(session, 'IngestQueueUpload'),
                      const.INGEST_LAMBDA,
                      handler="index.handler",
                      timeout=60 * 5)

    config.add_lambda_permission("IngestLambdaExecute", Ref("IngestLambda"))

    return config
Example #4
0
def create_config(session, domain, keypair=None, db_config={}):
    """
    Create the CloudFormationConfiguration object.
    Args:
        session: amazon session object
        domain (string): domain of the stack being created
        keypair: keypair used to by instances being created
        db_config (dict): information needed by rds

    Returns: the config for the Cloud Formation stack

    """

    names = AWSNames(domain)

    # Lookup IAM Role and SNS Topic ARNs for used later in the config
    endpoint_role_arn = aws.role_arn_lookup(session, "endpoint")
    cachemanager_role_arn = aws.role_arn_lookup(session, 'cachemanager')
    dns_arn = aws.sns_topic_lookup(session, names.dns.replace(".", "-"))
    if dns_arn is None:
        raise Exception("SNS topic named dns." + domain + " does not exist.")

    mailing_list_arn = aws.sns_topic_lookup(session, const.PRODUCTION_MAILING_LIST)
    if mailing_list_arn is None:
        msg = "MailingList {} needs to be created before running config".format(const.PRODUCTION_MAILING_LIST)
        raise Exception(msg)

    # Configure Vault and create the user data config that the endpoint will
    # use for connecting to Vault and the DB instance
    user_data = UserData()
    user_data["system"]["fqdn"] = names.endpoint
    user_data["system"]["type"] = "endpoint"
    user_data["aws"]["db"] = names.endpoint_db
    user_data["aws"]["cache"] = names.cache
    user_data["aws"]["cache-state"] = names.cache_state

    ## cache-db and cache-stat-db need to be in user_data for lambda to access them.
    user_data["aws"]["cache-db"] = "0"
    user_data["aws"]["cache-state-db"] = "0"
    user_data["aws"]["meta-db"] = names.meta

    # Use CloudFormation's Ref function so that queues' URLs are placed into
    # the Boss config file.
    user_data["aws"]["s3-flush-queue"] = str(Ref(names.s3flush_queue)) # str(Ref("S3FlushQueue")) DP XXX
    user_data["aws"]["s3-flush-deadletter-queue"] = str(Ref(names.deadletter_queue)) #str(Ref("DeadLetterQueue")) DP XXX
    user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket
    user_data["aws"]["tile_bucket"] = names.tile_bucket
    user_data["aws"]["ingest_bucket"] = names.ingest_bucket
    user_data["aws"]["s3-index-table"] = names.s3_index
    user_data["aws"]["tile-index-table"] = names.tile_index
    user_data["aws"]["id-index-table"] = names.id_index
    user_data["aws"]["id-count-table"] = names.id_count_index
    user_data["aws"]["prod_mailing_list"] = mailing_list_arn

    user_data["auth"]["OIDC_VERIFY_SSL"] = 'True'
    user_data["lambda"]["flush_function"] = names.multi_lambda
    user_data["lambda"]["page_in_function"] = names.multi_lambda
    user_data["lambda"]["ingest_function"] = names.multi_lambda

    user_data['sfn']['populate_upload_queue'] = names.ingest_queue_populate
    user_data['sfn']['upload_sfn'] = names.ingest_queue_upload
    user_data['sfn']['downsample_sfn'] = names.resolution_hierarchy

    # Prepare user data for parsing by CloudFormation.
    parsed_user_data = { "Fn::Join" : ["", user_data.format_for_cloudformation()]}

    config = CloudFormationConfiguration('api', domain, const.REGION)

    vpc_id = config.find_vpc(session)
    az_subnets, external_subnets = config.find_all_availability_zones(session)
    az_subnets_lambda, external_subnets_lambda = config.find_all_availability_zones(session, lambda_compatible_only=True)
    sgs = aws.sg_lookup_all(session, vpc_id)

    # DP XXX: hack until we can get productio updated correctly
    config.add_security_group('AllHTTPSSecurityGroup', 'https.' + domain, [('tcp', '443', '443', '0.0.0.0/0')])
    sgs[names.https] = Ref('AllHTTPSSecurityGroup')

    # Create SQS queues and apply access control policies.
    #config.add_sqs_queue("DeadLetterQueue", names.deadletter_queue, 30, 20160) DP XXX
    config.add_sqs_queue(names.deadletter_queue, names.deadletter_queue, 30, 20160)

    max_receives = 3
    #config.add_sqs_queue("S3FlushQueue", DP XXX
    config.add_sqs_queue(names.s3flush_queue,
                         names.s3flush_queue,
                         30,
                         dead=(Arn(names.deadletter_queue), max_receives))

    config.add_sqs_policy("sqsEndpointPolicy", 'sqsEndpointPolicy', # DP XXX
                          [Ref(names.deadletter_queue), Ref(names.s3flush_queue)],
                          endpoint_role_arn)

    config.add_sqs_policy("sqsCachemgrPolicy", 'sqsCachemgrPolicy', # DP XXX
                          [Ref(names.deadletter_queue), Ref(names.s3flush_queue)],
                          cachemanager_role_arn)

    # Create the endpoint ASG, ELB, and RDS instance
    config.add_autoscale_group("Endpoint",
                               names.endpoint,
                               aws.ami_lookup(session, "endpoint.boss"),
                               keypair,
                               subnets=az_subnets_lambda,
                               type_=const.ENDPOINT_TYPE,
                               security_groups=[sgs[names.internal]],
                               user_data=parsed_user_data,
                               min=const.ENDPOINT_CLUSTER_MIN,
                               max=const.ENDPOINT_CLUSTER_MAX,
                               elb=Ref("EndpointLoadBalancer"),
                               notifications=dns_arn,
                               role=aws.instance_profile_arn_lookup(session, 'endpoint'),
                               health_check_grace_period=90,
                               detailed_monitoring=True,
                               depends_on=["EndpointLoadBalancer", "EndpointDB"])

    cert = aws.cert_arn_lookup(session, names.public_dns("api"))
    config.add_loadbalancer("EndpointLoadBalancer",
                            names.endpoint_elb,
                            [("443", "80", "HTTPS", cert)],
                            subnets=external_subnets_lambda,
                            security_groups=[sgs[names.internal], sgs[names.https]],
                            public=True)

    # Endpoint servers are not CPU bound typically, so react quickly to load
    config.add_autoscale_policy("EndpointScaleUp",
                                Ref("Endpoint"),
                                adjustments=[
                                    (0.0, 10, 1),  # 12% - 22% Utilization add 1 instance
                                    (10, None, 2)  # Above 22% Utilization add 2 instances
                                ],
                                alarms=[
                                    ("CPUUtilization", "Maximum", "GreaterThanThreshold", "12")
                                ],
                                period=1)

    config.add_autoscale_policy("EndpointScaleDown",
                                Ref("Endpoint"),
                                adjustments=[
                                    (None, 0.0, -1),   # Under 1.5% Utilization remove 1 instance
                                ],
                                alarms=[
                                    ("CPUUtilization", "Average", "LessThanThreshold", "1.5")
                                ],
                                period=50)

    config.add_rds_db("EndpointDB",
                      names.endpoint_db,
                      db_config.get("port"),
                      db_config.get("name"),
                      db_config.get("user"),
                      db_config.get("password"),
                      az_subnets,
                      type_ = const.RDS_TYPE,
                      security_groups=[sgs[names.internal]])

    # Create the Meta, s3Index, tileIndex, annotation Dynamo tables
    with open(const.DYNAMO_METADATA_SCHEMA, 'r') as fh:
        dynamo_cfg = json.load(fh)
    config.add_dynamo_table_from_json("EndpointMetaDB", names.meta, **dynamo_cfg)

    with open(const.DYNAMO_S3_INDEX_SCHEMA, 'r') as s3fh:
        dynamo_s3_cfg = json.load(s3fh)
    config.add_dynamo_table_from_json('s3Index', names.s3_index, **dynamo_s3_cfg)  # DP XXX

    with open(const.DYNAMO_TILE_INDEX_SCHEMA, 'r') as tilefh:
        dynamo_tile_cfg = json.load(tilefh)
    config.add_dynamo_table_from_json('tileIndex', names.tile_index, **dynamo_tile_cfg)  # DP XXX

    with open(const.DYNAMO_ID_INDEX_SCHEMA, 'r') as id_ind_fh:
        dynamo_id_ind__cfg = json.load(id_ind_fh)
    config.add_dynamo_table_from_json('idIndIndex', names.id_index, **dynamo_id_ind__cfg)  # DP XXX

    with open(const.DYNAMO_ID_COUNT_SCHEMA, 'r') as id_count_fh:
        dynamo_id_count_cfg = json.load(id_count_fh)
    config.add_dynamo_table_from_json('idCountIndex', names.id_count_index, **dynamo_id_count_cfg)  # DP XXX

    return config
Example #5
0
def create_config(session, domain):
    """Create the CloudFormationConfiguration object."""
    config = CloudFormationConfiguration('core', domain, const.REGION)
    names = AWSNames(domain)

    global keypair
    keypair = aws.keypair_lookup(session)

    config.add_vpc()

    # Create the internal and external subnets
    config.add_subnet('InternalSubnet', names.subnet('internal'))
    config.add_subnet('ExternalSubnet', names.subnet('external'))
    internal_subnets, external_subnets = config.add_all_azs(session)
    # it seems that both Lambdas and ASGs needs lambda_compatible_only subnets.
    internal_subnets_lambda, external_subnets_lambda = config.add_all_azs(session, lambda_compatible_only=True)

    config.add_ec2_instance("Bastion",
                            names.bastion,
                            aws.ami_lookup(session, const.BASTION_AMI),
                            keypair,
                            subnet = Ref("ExternalSubnet"),
                            public_ip = True,
                            user_data = const.BASTION_USER_DATA,
                            security_groups = [Ref("InternalSecurityGroup"), Ref("BastionSecurityGroup")],
                            depends_on = "AttachInternetGateway")

    user_data = UserData()
    user_data["system"]["fqdn"] = names.consul
    user_data["system"]["type"] = "consul"
    user_data["consul"]["cluster"] = str(get_scenario(const.CONSUL_CLUSTER_SIZE))
    config.add_autoscale_group("Consul",
                               names.consul,
                               aws.ami_lookup(session, "consul.boss"),
                               keypair,
                               subnets = internal_subnets_lambda,
                               security_groups = [Ref("InternalSecurityGroup")],
                               user_data = str(user_data),
                               min = const.CONSUL_CLUSTER_SIZE,
                               max = const.CONSUL_CLUSTER_SIZE,
                               notifications = Ref("DNSSNS"),
                               role = aws.instance_profile_arn_lookup(session, 'consul'),
                               support_update = False, # Update will restart the instances manually
                               depends_on = ["DNSLambda", "DNSSNS", "DNSLambdaExecute"])

    user_data = UserData()
    user_data["system"]["fqdn"] = names.vault
    user_data["system"]["type"] = "vault"
    config.add_autoscale_group("Vault",
                               names.vault,
                               aws.ami_lookup(session, "vault.boss"),
                               keypair,
                               subnets = internal_subnets_lambda,
                               security_groups = [Ref("InternalSecurityGroup")],
                               user_data = str(user_data),
                               min = const.VAULT_CLUSTER_SIZE,
                               max = const.VAULT_CLUSTER_SIZE,
                               notifications = Ref("DNSSNS"),
                               depends_on = ["Consul", "DNSLambda", "DNSSNS", "DNSLambdaExecute"])


    user_data = UserData()
    user_data["system"]["fqdn"] = names.auth
    user_data["system"]["type"] = "auth"
    deps = ["AuthSecurityGroup",
            "AttachInternetGateway",
            "DNSLambda",
            "DNSSNS",
            "DNSLambdaExecute"]

    SCENARIO = os.environ["SCENARIO"]
    USE_DB = SCENARIO in ("production", "ha-development",)
    # Problem: If development scenario uses a local DB. If the auth server crashes
    #          and is auto restarted by the autoscale group then the new auth server
    #          will not have any of the previous configuration, because the old DB
    #          was lost. Using an RDS for development fixes this at the cost of having
    #          the core config taking longer to launch.
    if USE_DB:
        deps.append("AuthDB")
        user_data["aws"]["db"] = "keycloak" # flag for init script for which config to use

    cert = aws.cert_arn_lookup(session, names.public_dns('auth'))
    create_asg_elb(config,
                   "Auth",
                   names.auth,
                   aws.ami_lookup(session, "auth.boss"),
                   keypair,
                   str(user_data),
                   const.AUTH_CLUSTER_SIZE,
                   internal_subnets_lambda,
                   external_subnets_lambda,
                   [("443", "8080", "HTTPS", cert)],
                   "HTTP:8080/index.html",
                   sgs = [Ref("AuthSecurityGroup")],
                   type_=const.AUTH_TYPE,
                   depends_on=deps)

    if USE_DB:
        config.add_rds_db("AuthDB",
                          names.auth_db,
                          "3306",
                          "keycloak",
                          "keycloak",
                          "keycloak",
                          internal_subnets,
                          type_ = "db.t2.micro",
                          security_groups = [Ref("InternalSecurityGroup")])


    config.add_lambda("DNSLambda",
                      names.dns,
                      aws.role_arn_lookup(session, 'UpdateRoute53'),
                      const.DNS_LAMBDA,
                      handler="index.handler",
                      timeout=10,
                      depends_on="DNSZone")

    config.add_lambda_permission("DNSLambdaExecute", Ref("DNSLambda"))

    config.add_sns_topic("DNSSNS",
                         names.dns,
                         names.dns,
                         [("lambda", Arn("DNSLambda"))])


    config.add_security_group("InternalSecurityGroup",
                              names.internal,
                              [("-1", "-1", "-1", "10.0.0.0/8")])

    # Allow SSH access to bastion from anywhere
    config.add_security_group("BastionSecurityGroup",
                              names.ssh,
                              [("tcp", "22", "22", const.INCOMING_SUBNET)])

    config.add_security_group("AuthSecurityGroup",
                              #names.https, DP XXX: hack until we can get production updated correctly
                              names.auth,
                              [("tcp", "443", "443", "0.0.0.0/0")])

    # Create the internal route table to route traffic to the NAT Bastion
    all_internal_subnets = internal_subnets.copy()
    all_internal_subnets.append(Ref("InternalSubnet"))
    config.add_route_table("InternalRouteTable",
                           names.internal,
                           subnets = all_internal_subnets)

    config.add_route_table_route("InternalNatRoute",
                                 Ref("InternalRouteTable"),
                                 nat = Ref("NAT"),
                                 depends_on = "NAT")

    # Create the internet gateway and internet router
    all_external_subnets = external_subnets.copy()
    all_external_subnets.append(Ref("ExternalSubnet"))
    config.add_route_table("InternetRouteTable",
                           names.internet,
                           subnets = all_external_subnets)

    config.add_route_table_route("InternetRoute",
                                 Ref("InternetRouteTable"),
                                 gateway = Ref("InternetGateway"),
                                 depends_on = "AttachInternetGateway")

    config.add_internet_gateway("InternetGateway", names.internet)
    config.add_endpoint("S3Endpoint", "s3", [Ref("InternalRouteTable"), Ref('InternetRouteTable')])
    config.add_endpoint("DynamoDBEndpoint", "dynamodb", [Ref("InternalRouteTable"), Ref('InternetRouteTable')])
    config.add_nat("NAT", Ref("ExternalSubnet"), depends_on="AttachInternetGateway")

    return config
Example #6
0
def create_config(bosslet_config, db_config={}):
    names = bosslet_config.names
    session = bosslet_config.session

    # Lookup IAM Role and SNS Topic ARNs for used later in the config
    endpoint_role_arn = aws.role_arn_lookup(session, "endpoint")
    cachemanager_role_arn = aws.role_arn_lookup(session, 'cachemanager')
    dns_arn = aws.sns_topic_lookup(session, names.dns.sns)
    if dns_arn is None:
        raise MissingResourceError('SNS topic', names.dns.sns)

    mailing_list_arn = aws.sns_topic_lookup(session,
                                            bosslet_config.ALERT_TOPIC)
    if mailing_list_arn is None:
        raise MissingResourceError('SNS topic', bosslet_config.ALERT_TOPIC)

    # Configure Vault and create the user data config that the endpoint will
    # use for connecting to Vault and the DB instance
    user_data = UserData()
    user_data["system"]["fqdn"] = names.endpoint.dns
    user_data["system"]["type"] = "endpoint"
    user_data["aws"]["db"] = names.endpoint_db.rds
    user_data["aws"]["cache"] = names.cache.redis
    user_data["aws"]["cache-state"] = names.cache_state.redis
    if const.REDIS_SESSION_TYPE is not None:
        user_data["aws"]["cache-session"] = names.cache_session.redis
    else:
        # Don't create a Redis server for dev stacks.
        user_data["aws"]["cache-session"] = ''
    if const.REDIS_THROTTLE_TYPE is not None:
        user_data["aws"]["cache-throttle"] = names.cache_throttle.redis
    else:
        user_data["aws"]["cache-throttle"] = ''

    ## cache-db and cache-stat-db need to be in user_data for lambda to access them.
    user_data["aws"]["cache-db"] = "0"
    user_data["aws"]["cache-state-db"] = "0"
    user_data["aws"]["cache-throttle-db"] = "0"
    user_data["aws"]["cache-session-db"] = "0"
    user_data["aws"]["meta-db"] = names.meta.ddb

    # Use CloudFormation's Ref function so that queues' URLs are placed into
    # the Boss config file.
    user_data["aws"]["s3-flush-queue"] = str(Ref(
        names.s3flush.sqs))  # str(Ref("S3FlushQueue")) DP XXX
    user_data["aws"]["s3-flush-deadletter-queue"] = str(
        Ref(names.deadletter.sqs))  #str(Ref("DeadLetterQueue")) DP XXX
    user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket.s3
    user_data["aws"]["tile_bucket"] = names.tile_bucket.s3
    user_data["aws"]["ingest_bucket"] = names.ingest_bucket.s3
    user_data["aws"]["s3-index-table"] = names.s3_index.ddb
    user_data["aws"]["tile-index-table"] = names.tile_index.ddb
    user_data["aws"]["id-index-table"] = names.id_index.ddb
    user_data["aws"]["id-count-table"] = names.id_count_index.ddb
    user_data["aws"]["prod_mailing_list"] = mailing_list_arn
    user_data["aws"]["max_task_id_suffix"] = str(const.MAX_TASK_ID_SUFFIX)
    user_data["aws"]["id-index-new-chunk-threshold"] = str(
        const.DYNAMO_ID_INDEX_NEW_CHUNK_THRESHOLD)
    user_data["aws"]["index-deadletter-queue"] = str(
        Ref(names.index_deadletter.sqs))
    user_data["aws"]["index-cuboids-keys-queue"] = str(
        Ref(names.index_cuboids_keys.sqs))

    user_data["auth"]["OIDC_VERIFY_SSL"] = str(bosslet_config.VERIFY_SSL)
    user_data["lambda"]["flush_function"] = names.multi_lambda.lambda_
    user_data["lambda"]["page_in_function"] = names.multi_lambda.lambda_
    user_data["lambda"]["ingest_function"] = names.tile_ingest.lambda_
    user_data["lambda"]["downsample_volume"] = names.downsample_volume.lambda_
    user_data["lambda"]["tile_uploaded_function"] = names.tile_uploaded.lambda_

    user_data['sfn']['populate_upload_queue'] = names.ingest_queue_populate.sfn
    user_data['sfn']['upload_sfn'] = names.ingest_queue_upload.sfn
    user_data['sfn'][
        'volumetric_upload_sfn'] = names.volumetric_ingest_queue_upload.sfn
    user_data['sfn']['downsample_sfn'] = names.resolution_hierarchy.sfn
    user_data['sfn'][
        'index_cuboid_supervisor_sfn'] = names.index_cuboid_supervisor.sfn

    # Prepare user data for parsing by CloudFormation.
    parsed_user_data = {
        "Fn::Join": ["", user_data.format_for_cloudformation()]
    }

    config = CloudFormationConfiguration('api', bosslet_config, version="2")
    keypair = bosslet_config.SSH_KEY

    vpc_id = config.find_vpc()
    internal_subnets, external_subnets = config.find_all_subnets()
    az_subnets_asg, external_subnets_asg = config.find_all_subnets(
        compatibility='asg')
    sgs = aws.sg_lookup_all(session, vpc_id)

    # DP XXX: hack until we can get productio updated correctly
    config.add_security_group(
        'AllHttpHttpsSecurityGroup', names.https.sg,
        [('tcp', '443', '443', bosslet_config.HTTPS_INBOUND),
         ('tcp', '80', '80', bosslet_config.HTTPS_INBOUND)])
    sgs[names.https.sg] = Ref('AllHttpHttpsSecurityGroup')

    # Create SQS queues and apply access control policies.
    # Deadletter queue for indexing operations.  This one is populated
    # manually by states in the indexing step functions.
    config.add_sqs_queue(names.index_deadletter.sqs,
                         names.index_deadletter.sqs, 30, 20160)

    # Queue that holds S3 object keys of cuboids to be indexed.
    config.add_sqs_queue(names.index_cuboids_keys.sqs,
                         names.index_cuboids_keys.sqs, 120, 20160)

    #config.add_sqs_queue("DeadLetterQueue", names.deadletter.sqs, 30, 20160) DP XXX
    config.add_sqs_queue(names.deadletter.sqs, names.deadletter.sqs, 30, 20160)

    max_receives = 3
    #config.add_sqs_queue("S3FlushQueue", DP XXX
    config.add_sqs_queue(names.s3flush.sqs,
                         names.s3flush.sqs,
                         30,
                         dead=(Arn(names.deadletter.sqs), max_receives))

    config.add_sqs_policy(
        "sqsEndpointPolicy",
        'sqsEndpointPolicy',  # DP XXX
        [Ref(names.deadletter.sqs),
         Ref(names.s3flush.sqs)],
        endpoint_role_arn)

    config.add_sqs_policy(
        "sqsCachemgrPolicy",
        'sqsCachemgrPolicy',  # DP XXX
        [Ref(names.deadletter.sqs),
         Ref(names.s3flush.sqs)],
        cachemanager_role_arn)

    # Create the endpoint ASG, ELB, and RDS instance

    cert = aws.cert_arn_lookup(session, names.public_dns("api"))
    target_group_keys = config.add_app_loadbalancer(
        "EndpointAppLoadBalancer",
        names.endpoint_elb.dns, [("443", "80", "HTTPS", cert)],
        vpc_id=vpc_id,
        subnets=external_subnets_asg,
        security_groups=[sgs[names.internal.sg], sgs[names.https.sg]],
        public=True)

    target_group_arns = [Ref(key) for key in target_group_keys]

    config.add_public_dns('EndpointAppLoadBalancer', names.public_dns('api'))
    config.add_autoscale_group("Endpoint",
                               names.endpoint.dns,
                               aws.ami_lookup(bosslet_config,
                                              names.endpoint.ami),
                               keypair,
                               subnets=az_subnets_asg,
                               type_=const.ENDPOINT_TYPE,
                               security_groups=[sgs[names.internal.sg]],
                               user_data=parsed_user_data,
                               min=const.ENDPOINT_CLUSTER_MIN,
                               max=const.ENDPOINT_CLUSTER_MAX,
                               notifications=dns_arn,
                               role=aws.instance_profile_arn_lookup(
                                   session, 'endpoint'),
                               health_check_grace_period=90,
                               detailed_monitoring=True,
                               target_group_arns=target_group_arns,
                               depends_on=["EndpointDB"])

    # Endpoint servers are not CPU bound typically, so react quickly to load
    config.add_autoscale_policy(
        "EndpointScaleUp",
        Ref("Endpoint"),
        adjustments=[
            (0.0, 10, 1),  # 12% - 22% Utilization add 1 instance
            (10, None, 2)  # Above 22% Utilization add 2 instances
        ],
        alarms=[("CPUUtilization", "Maximum", "GreaterThanThreshold", "12")],
        period=1)

    config.add_autoscale_policy(
        "EndpointScaleDown",
        Ref("Endpoint"),
        adjustments=[
            (None, 0.0, -1),  # Under 1.5% Utilization remove 1 instance
        ],
        alarms=[("CPUUtilization", "Average", "LessThanThreshold", "1.5")],
        period=50)

    config.add_rds_db("EndpointDB",
                      names.endpoint_db.dns,
                      db_config.get("port"),
                      db_config.get("name"),
                      db_config.get("user"),
                      db_config.get("password"),
                      internal_subnets,
                      type_=const.RDS_TYPE,
                      security_groups=[sgs[names.internal.sg]])

    # Create the Meta, s3Index, tileIndex, annotation Dynamo tables
    with open(const.DYNAMO_METADATA_SCHEMA, 'r') as fh:
        dynamo_cfg = json.load(fh)
    config.add_dynamo_table_from_json("EndpointMetaDB", names.meta.ddb,
                                      **dynamo_cfg)

    with open(const.DYNAMO_S3_INDEX_SCHEMA, 'r') as s3fh:
        dynamo_s3_cfg = json.load(s3fh)
    config.add_dynamo_table_from_json('s3Index', names.s3_index.ddb,
                                      **dynamo_s3_cfg)  # DP XXX

    with open(const.DYNAMO_TILE_INDEX_SCHEMA, 'r') as tilefh:
        dynamo_tile_cfg = json.load(tilefh)
    config.add_dynamo_table_from_json('tileIndex', names.tile_index.ddb,
                                      **dynamo_tile_cfg)  # DP XXX

    with open(const.DYNAMO_ID_INDEX_SCHEMA, 'r') as id_ind_fh:
        dynamo_id_ind__cfg = json.load(id_ind_fh)
    config.add_dynamo_table_from_json('idIndIndex', names.id_index.ddb,
                                      **dynamo_id_ind__cfg)  # DP XXX

    with open(const.DYNAMO_ID_COUNT_SCHEMA, 'r') as id_count_fh:
        dynamo_id_count_cfg = json.load(id_count_fh)
    config.add_dynamo_table_from_json('idCountIndex', names.id_count_index.ddb,
                                      **dynamo_id_count_cfg)  # DP XXX

    return config
Example #7
0
def create_config(session, domain):
    """Create the CloudFormationConfiguration object."""
    config = CloudFormationConfiguration('activities', domain, const.REGION)
    names = AWSNames(domain)

    global keypair
    keypair = aws.keypair_lookup(session)

    vpc_id = config.find_vpc(session)
    sgs = aws.sg_lookup_all(session, vpc_id)
    internal_subnets, _ = config.find_all_availability_zones(session)
    internal_subnets_lambda, _ = config.find_all_availability_zones(session, lambda_compatible_only=True)
    topic_arn = aws.sns_topic_lookup(session, "ProductionMicronsMailingList")
    event_data = {
        "lambda-name": "delete_lambda",
        "db": names.endpoint_db,
        "meta-db": names.meta,
        "s3-index-table": names.s3_index,
        "id-index-table": names.id_index,
        "id-count-table": names.id_count_index,
        "cuboid_bucket": names.cuboid_bucket,
        "delete_bucket": names.delete_bucket,
        "topic-arn": topic_arn,
        "query-deletes-sfn-name": names.query_deletes,
        "delete-sfn-name": names.delete_cuboid,
        "delete-exp-sfn-name": names.delete_experiment,
        "delete-coord-frame-sfn-name": names.delete_coord_frame,
        "delete-coll-sfn-name": names.delete_collection
    }

    role_arn = aws.role_arn_lookup(session, "events_for_delete_lambda")
    multi_lambda = names.multi_lambda
    lambda_arn = aws.lambda_arn_lookup(session, multi_lambda)
    target_list = [{
        "Arn": lambda_arn,
        "Id": multi_lambda,
        "Input": json.dumps(event_data)
    }]
    schedule_expression = "cron(1 6-11/1 ? * TUE-FRI *)"
    #schedule_expression = "cron(0/2 * * * ? *)"  # testing fire every two minutes

    config.add_event_rule("DeleteEventRule", names.delete_event_rule, role_arn=role_arn,
                          schedule_expression=schedule_expression, target_list=target_list, description=None)
    # Events have to be given permission to run lambda.
    config.add_lambda_permission('DeleteRulePerm', multi_lambda, principal='events.amazonaws.com',
                                 source=Arn('DeleteEventRule'))
    user_data = UserData()
    user_data["system"]["fqdn"] = names.activities
    user_data["system"]["type"] = "activities"
    user_data["aws"]["db"] = names.endpoint_db
    user_data["aws"]["cache"] = names.cache
    user_data["aws"]["cache-state"] = names.cache_state
    user_data["aws"]["cache-db"] = "0"
    user_data["aws"]["cache-state-db"] = "0"
    user_data["aws"]["meta-db"] = names.meta
    user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket
    user_data["aws"]["tile_bucket"] = names.tile_bucket
    user_data["aws"]["ingest_bucket"] = names.ingest_bucket
    user_data["aws"]["s3-index-table"] = names.s3_index
    user_data["aws"]["tile-index-table"] = names.tile_index
    user_data["aws"]["id-index-table"] = names.id_index
    user_data["aws"]["id-count-table"] = names.id_count_index
    user_data["aws"]["max_task_id_suffix"] = str(const.MAX_TASK_ID_SUFFIX)

    config.add_autoscale_group("Activities",
                               names.activities,
                               aws.ami_lookup(session, 'activities.boss'),
                               keypair,
                               subnets=internal_subnets_lambda,
                               type_=const.ACTIVITIES_TYPE,
                               security_groups=[sgs[names.internal]],
                               user_data=str(user_data),
                               role=aws.instance_profile_arn_lookup(session, "activities"),
                               min=1,
                               max=1)

    config.add_lambda("IngestLambda",
                      names.ingest_lambda,
                      aws.role_arn_lookup(session, 'IngestQueueUpload'),
                      const.INGEST_LAMBDA,
                      handler="index.handler",
                      timeout=60 * 5,
                      memory=3008)

    config.add_lambda_permission("IngestLambdaExecute", Ref("IngestLambda"))


    # Downsample / Resolution Hierarchy support
    lambda_role = aws.role_arn_lookup(session, "lambda_resolution_hierarchy")

    config.add_lambda("DownsampleVolumeLambda",
                      names.downsample_volume_lambda,
                      lambda_role,
                      s3=(aws.get_lambda_s3_bucket(session),
                          "multilambda.{}.zip".format(domain),
                          "downsample_volume.handler"),
                      timeout=120,
                      memory=1024,
                      runtime='python3.6',
                      dlq = Ref('DownsampleDLQ'))

    config.add_sns_topic("DownsampleDLQ",
                         names.downsample_dlq,
                         names.downsample_dlq,
                         [('lambda', Arn('DownsampleDLQLambda'))])

    config.add_lambda('DownsampleDLQLambda',
                      names.downsample_dlq,
                      lambda_role,
                      const.DOWNSAMPLE_DLQ_LAMBDA,
                      handler='index.handler',
                      timeout=10)

    config.add_lambda_permission('DownsampleDLQLambdaExecute',
                                 Ref('DownsampleDLQLambda'))

    return config