def build_user_data(bosslet_config): names = bosslet_config.names session = bosslet_config.session user_data = UserData() user_data["system"]["fqdn"] = names.cachemanager.dns user_data["system"]["type"] = "cachemanager" user_data["aws"]["cache"] = names.cache.redis user_data["aws"]["cache-state"] = names.cache_state.redis user_data["aws"]["cache-db"] = "0" user_data["aws"]["cache-state-db"] = "0" user_data["aws"]["s3-flush-queue"] = aws.sqs_lookup_url(session, names.s3flush.sqs) user_data["aws"]["s3-flush-deadletter-queue"] = aws.sqs_lookup_url(session, names.deadletter.sqs) user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket.s3 user_data["aws"]["ingest_bucket"] = names.ingest_bucket.s3 user_data["aws"]["s3-index-table"] = names.s3_index.ddb user_data["aws"]["id-index-table"] = names.id_index.ddb user_data["aws"]["id-count-table"] = names.id_count_index.ddb #user_data["aws"]["sns-write-locked"] = str(Ref('WriteLock')) mailing_list_arn = aws.sns_topic_lookup(session, bosslet_config.ALERT_TOPIC) if mailing_list_arn is None: raise MissingResourceError('SNS topic', bosslet_config.ALERT_TOPIC) user_data["aws"]["sns-write-locked"] = mailing_list_arn user_data["lambda"]["flush_function"] = names.multi_lambda.lambda_ user_data["lambda"]["page_in_function"] = names.multi_lambda.lambda_ return user_data
def create_billing_alarms(session): print("creating billing alarms") billing_topic_arn = aws.sns_topic_lookup(session, const.PRODUCTION_BILLING_TOPIC) client = session.client("cloudwatch") alarm_parms = { 'AlarmName': 'Billing_1k', 'AlarmDescription': 'Alarm when spending reaches 1k', 'ActionsEnabled': True, 'OKActions': [], 'AlarmActions': [billing_topic_arn], 'InsufficientDataActions': [], 'MetricName': 'EstimatedCharges', 'Namespace': 'AWS/Billing', 'Statistic': 'Maximum', 'Dimensions': [{ 'Name': 'Currency', 'Value': 'USD' }], 'Period': 21600, 'EvaluationPeriods': 1, 'Threshold': 1000.0, 'ComparisonOperator': 'GreaterThanOrEqualToThreshold' } for num in range(1, MAX_ALARM_DOLLAR + 1): print(" {}k".format(str(num))) alarm_parms['AlarmName'] = "Billing_{}k".format(str(num)) alarm_parms[ 'AlarmDescription'] = "Alarm when spending reaches {}k".format( str(num)) alarm_parms['Threshold'] = float(num * 1000) response = client.put_metric_alarm(**alarm_parms)
def create_billing_alarms(session): print("creating billing alarms") billing_topic_arn = aws.sns_topic_lookup(session, const.PRODUCTION_BILLING_TOPIC) client = session.client("cloudwatch") alarm_parms = { 'AlarmName': 'Billing_1k', 'AlarmDescription': 'Alarm when spending reaches 1k', 'ActionsEnabled': True, 'OKActions': [], 'AlarmActions': [billing_topic_arn], 'InsufficientDataActions': [], 'MetricName': 'EstimatedCharges', 'Namespace': 'AWS/Billing', 'Statistic': 'Maximum', 'Dimensions': [{'Name': 'Currency', 'Value': 'USD'}], 'Period': 10, 'EvaluationPeriods': 1, 'Threshold': 1000.0, 'ComparisonOperator': 'GreaterThanOrEqualToThreshold' } for num in range(1, const.MAX_ALARM_DOLLAR + 1): print(" {}k".format(str(num))) alarm_parms['AlarmName'] = "Billing_{}k".format(str(num)) alarm_parms['AlarmDescription'] = "Alarm when spending reaches {}k".format(str(num)) alarm_parms['Threshold'] = float(num * 1000) response = client.put_metric_alarm(**alarm_parms)
def create(session, domain): """Create the configuration, and launch it""" names = AWSNames(domain) user_data = UserData() user_data["system"]["fqdn"] = names.cache_manager user_data["system"]["type"] = "cachemanager" user_data["aws"]["cache"] = names.cache user_data["aws"]["cache-state"] = names.cache_state user_data["aws"]["cache-db"] = "0" user_data["aws"]["cache-state-db"] = "0" user_data["aws"]["s3-flush-queue"] = aws.sqs_lookup_url( session, names.s3flush_queue) user_data["aws"]["s3-flush-deadletter-queue"] = aws.sqs_lookup_url( session, names.deadletter_queue) user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket user_data["aws"]["ingest_bucket"] = names.ingest_bucket user_data["aws"]["s3-index-table"] = names.s3_index user_data["aws"]["id-index-table"] = names.id_index user_data["aws"]["id-count-table"] = names.id_count_index #user_data["aws"]["sns-write-locked"] = str(Ref('WriteLock')) mailing_list_arn = aws.sns_topic_lookup(session, const.PRODUCTION_MAILING_LIST) if mailing_list_arn is None: msg = "MailingList {} needs to be created before running config".format( const.PRODUCTION_MAILING_LIST) raise Exception(msg) user_data["aws"]["sns-write-locked"] = mailing_list_arn user_data["lambda"]["flush_function"] = names.multi_lambda user_data["lambda"]["page_in_function"] = names.multi_lambda keypair = aws.keypair_lookup(session) try: pre_init(session, domain) config = create_config(session, domain, keypair, user_data) success = config.create(session) if not success: raise Exception("Create Failed") else: post_init(session, domain) except: # DP NOTE: This will catch errors from pre_init, create, and post_init print("Error detected") raise
def create(session, domain): """Create the configuration, and launch it""" names = AWSNames(domain) user_data = UserData() user_data["system"]["fqdn"] = names.cache_manager user_data["system"]["type"] = "cachemanager" user_data["aws"]["cache"] = names.cache user_data["aws"]["cache-state"] = names.cache_state user_data["aws"]["cache-db"] = "0" user_data["aws"]["cache-state-db"] = "0" user_data["aws"]["s3-flush-queue"] = aws.sqs_lookup_url(session, names.s3flush_queue) user_data["aws"]["s3-flush-deadletter-queue"] = aws.sqs_lookup_url(session, names.deadletter_queue) user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket user_data["aws"]["ingest_bucket"] = names.ingest_bucket user_data["aws"]["s3-index-table"] = names.s3_index user_data["aws"]["id-index-table"] = names.id_index user_data["aws"]["id-count-table"] = names.id_count_index #user_data["aws"]["sns-write-locked"] = str(Ref('WriteLock')) mailing_list_arn = aws.sns_topic_lookup(session, const.PRODUCTION_MAILING_LIST) if mailing_list_arn is None: msg = "MailingList {} needs to be created before running config".format(const.PRODUCTION_MAILING_LIST) raise Exception(msg) user_data["aws"]["sns-write-locked"] = mailing_list_arn user_data["lambda"]["flush_function"] = names.multi_lambda user_data["lambda"]["page_in_function"] = names.multi_lambda keypair = aws.keypair_lookup(session) try: pre_init(session, domain) config = create_config(session, domain, keypair, user_data) success = config.create(session) if not success: raise Exception("Create Failed") else: post_init(session, domain) except: # DP NOTE: This will catch errors from pre_init, create, and post_init print("Error detected") raise
def create_config(bosslet_config, lookup=True): """Create the CloudFormationConfiguration object.""" config = CloudFormationConfiguration('activities', bosslet_config) names = bosslet_config.names keypair = bosslet_config.SSH_KEY session = bosslet_config.session vpc_id = config.find_vpc() sgs = aws.sg_lookup_all(session, vpc_id) internal_subnets, _ = config.find_all_subnets() internal_subnets_asg, _ = config.find_all_subnets(compatibility='asg') topic_arn = aws.sns_topic_lookup(session, bosslet_config.ALERT_TOPIC) if topic_arn is None: raise MissingResourceError('SNS topic', bosslet_config.ALERT_TOPIC) event_data = { "lambda-name": "delete_lambda", "db": names.endpoint_db.rds, "meta-db": names.meta.ddb, "s3-index-table": names.s3_index.ddb, "id-index-table": names.id_index.ddb, "id-count-table": names.id_count_index.ddb, "cuboid_bucket": names.cuboid_bucket.s3, "delete_bucket": names.delete_bucket.s3, "topic-arn": topic_arn, "query-deletes-sfn-name": names.query_deletes.sfn, "delete-sfn-name": names.delete_cuboid.sfn, "delete-exp-sfn-name": names.delete_experiment.sfn, "delete-coord-frame-sfn-name": names.delete_coord_frame.sfn, "delete-coll-sfn-name": names.delete_collection.sfn, } role_arn = aws.role_arn_lookup(session, "events_for_delete_lambda") multi_lambda = names.multi_lambda.lambda_ if lookup: lambda_arn = aws.lambda_arn_lookup(session, multi_lambda) else: lambda_arn = None target_list = [{ "Arn": lambda_arn, "Id": multi_lambda, "Input": json.dumps(event_data) }] schedule_expression = "cron(1 6-11/1 ? * TUE-FRI *)" #schedule_expression = "cron(0/2 * * * ? *)" # testing fire every two minutes config.add_event_rule("DeleteEventRule", # XXX What type for event rules? names.delete_event_rule.dns, role_arn=role_arn, schedule_expression=schedule_expression, target_list=target_list, state='DISABLED') # Disabled until new delete is finished. # Events have to be given permission to run lambda. config.add_lambda_permission('DeleteRulePerm', multi_lambda, principal='events.amazonaws.com', source=Arn('DeleteEventRule')) user_data = UserData() user_data["system"]["fqdn"] = names.activities.dns user_data["system"]["type"] = "activities" user_data["aws"]["db"] = names.endpoint_db.rds user_data["aws"]["cache"] = names.cache.redis user_data["aws"]["cache-state"] = names.cache_state.redis user_data["aws"]["cache-db"] = "0" user_data["aws"]["cache-state-db"] = "0" user_data["aws"]["meta-db"] = names.meta.ddb user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket.s3 user_data["aws"]["tile_bucket"] = names.tile_bucket.s3 user_data["aws"]["ingest_bucket"] = names.ingest_bucket.s3 user_data["aws"]["s3-index-table"] = names.s3_index.ddb user_data["aws"]["tile-index-table"] = names.tile_index.ddb user_data["aws"]["id-index-table"] = names.id_index.ddb user_data["aws"]["id-count-table"] = names.id_count_index.ddb user_data["aws"]["max_task_id_suffix"] = str(const.MAX_TASK_ID_SUFFIX) user_data["aws"]["tile_ingest_lambda"] = names.tile_ingest.lambda_ user_data["aws"]["tile_uploaded_lambda"] = names.tile_uploaded.lambda_ config.add_autoscale_group("Activities", names.activities.dns, aws.ami_lookup(bosslet_config, names.activities.ami), keypair, subnets=internal_subnets_asg, type_=const.ACTIVITIES_TYPE, security_groups=[sgs[names.internal.sg]], user_data=str(user_data), role=aws.instance_profile_arn_lookup(session, "activities"), min=1, max=1) config.add_lambda("IngestLambda", names.ingest_lambda.lambda_, aws.role_arn_lookup(session, 'IngestQueueUpload'), const.INGEST_LAMBDA, handler="index.handler", timeout=60 * 5, runtime='python3.6', memory=3008) config.add_lambda_permission("IngestLambdaExecute", Ref("IngestLambda")) # Downsample / Resolution Hierarchy support lambda_role = aws.role_arn_lookup(session, "lambda_resolution_hierarchy") config.add_lambda("DownsampleVolumeLambda", names.downsample_volume.lambda_, lambda_role, handler="downsample_volume.handler", timeout=120, memory=1024, dlq = Ref('DownsampleDLQ')) start_sfn_lambda_role = aws.role_arn_lookup(session, 'StartStepFcnLambdaRole') config.add_lambda("startSfnLambda", names.start_sfn.lambda_, start_sfn_lambda_role, handler="start_sfn_lambda.handler", timeout=60, memory=128) # This dead letter queue behavior uses a lambda to put failed lambda # executions into a dlqs created specifically for each downsample job. # There is a separate dlq for each resolution. config.add_sns_topic("DownsampleDLQ", names.downsample_dlq.sns, names.downsample_dlq.sns, [('lambda', Arn('DownsampleDLQLambda'))]) config.add_lambda('DownsampleDLQLambda', names.downsample_dlq.lambda_, lambda_role, const.DOWNSAMPLE_DLQ_LAMBDA, handler='index.handler', runtime='python3.7', timeout=10) config.add_lambda_permission('DownsampleDLQLambdaExecute', Ref('DownsampleDLQLambda')) return config
def create_config(bosslet_config): """Create the CloudFormationConfiguration object. :arg session used to perform lookups :arg domain DNS name of vpc """ config = CloudFormationConfiguration('cloudwatch', bosslet_config) names = bosslet_config.names session = bosslet_config.session domain = bosslet_config.INTERNAL_DOMAIN vpc_id = config.find_vpc() lambda_subnets, _ = config.find_all_subnets(compatibility='lambda') internal_sg = aws.sg_lookup(session, vpc_id, names.internal.sg) loadbalancer_name = names.endpoint_elb.dns if not aws.lb_lookup(session, loadbalancer_name): raise MissingResourceError('ELB', loadbalancer_name) # TODO Test that MailingListTopic is working. production_mailing_list = bosslet_config.ALERT_TOPIC mailing_list_arn = aws.sns_topic_lookup(session, production_mailing_list) if mailing_list_arn is None: raise MissingResourceError('SNS topic', bosslet_config.ALERT_TOPIC) config.add_cloudwatch(loadbalancer_name, [mailing_list_arn]) lambda_role = aws.role_arn_lookup(session, 'VaultConsulHealthChecker') config.add_arg( Arg.String('VaultConsulHealthChecker', lambda_role, 'IAM role for vault/consul health check')) config.add_lambda('VaultLambda', names.vault_monitor.lambda_, description='Check health of vault instances.', timeout=30, role=Ref('VaultConsulHealthChecker'), security_groups=[internal_sg], subnets=lambda_subnets, handler='index.lambda_handler', file=const.VAULT_LAMBDA) # Lambda input data json_str = json.dumps({ 'hostname': names.vault.dns, }) config.add_cloudwatch_rule('VaultCheck', name=names.vault_check.cw, description='Check health of vault instances.', targets=[ { 'Arn': Arn('VaultLambda'), 'Id': names.vault_monitor.lambda_, 'Input': json_str }, ], schedule='rate(2 minutes)', depends_on=['VaultLambda']) config.add_lambda_permission('VaultPerms', names.vault_monitor.lambda_, principal='events.amazonaws.com', source=Arn('VaultCheck')) return config
def create_config(session, domain): """Create the CloudFormationConfiguration object. :arg session used to perform lookups :arg domain DNS name of vpc """ config = CloudFormationConfiguration('cloudwatch', domain) names = AWSNames(domain) vpc_id = config.find_vpc(session) lambda_subnets, _ = config.find_all_availability_zones(session, lambda_compatible_only=True) internal_sg = aws.sg_lookup(session, vpc_id, names.internal) loadbalancer_name = names.endpoint_elb if not aws.lb_lookup(session, loadbalancer_name): raise Exception("Invalid load balancer name: " + loadbalancer_name) # TODO Test that MailingListTopic is working. production_mailing_list = const.PRODUCTION_MAILING_LIST mailing_list_arn = aws.sns_topic_lookup(session, production_mailing_list) if mailing_list_arn is None: #config.add_sns_topic("topicList", production_mailing_list) msg = "MailingList {} needs to be created before running config" raise Exception(msg.format(const.PRODUCTION_MAILING_LIST)) config.add_cloudwatch(loadbalancer_name, [mailing_list_arn]) lambda_role = aws.role_arn_lookup(session, 'VaultConsulHealthChecker') config.add_arg(Arg.String( 'VaultConsulHealthChecker', lambda_role, 'IAM role for vault/consul health check.' + domain)) config.add_lambda('VaultLambda', names.vault_monitor, description='Check health of vault instances.', timeout=30, role=Ref('VaultConsulHealthChecker'), security_groups=[internal_sg], subnets=lambda_subnets, handler='index.lambda_handler', file=const.VAULT_LAMBDA) config.add_lambda('ConsulLambda', names.consul_monitor, description='Check health of vault instances.', timeout=30, role=Ref('VaultConsulHealthChecker'), security_groups=[internal_sg], subnets=lambda_subnets, handler='index.lambda_handler', file=const.CONSUL_LAMBDA) # Lambda input data json_str = json.dumps({ 'vpc_id': vpc_id, 'vpc_name': domain, 'topic_arn': mailing_list_arn, }) config.add_cloudwatch_rule('VaultConsulCheck', name=names.vault_consul_check, description='Check health of vault and consul instances.', targets=[ { 'Arn': Arn('VaultLambda'), 'Id': names.vault_monitor, 'Input': json_str }, { 'Arn': Arn('ConsulLambda'), 'Id': names.consul_monitor, 'Input': json_str }, ], schedule='rate(1 minute)', depends_on=['VaultLambda', 'ConsulLambda']) config.add_lambda_permission('VaultPerms', names.vault_monitor, principal='events.amazonaws.com', source=Arn('VaultConsulCheck')) config.add_lambda_permission('ConsulPerms', names.consul_monitor, principal='events.amazonaws.com', source=Arn('VaultConsulCheck')) return config
def create_config(session, domain): """Create the CloudFormationConfiguration object.""" config = CloudFormationConfiguration('activities', domain, const.REGION) names = AWSNames(domain) global keypair keypair = aws.keypair_lookup(session) vpc_id = config.find_vpc(session) sgs = aws.sg_lookup_all(session, vpc_id) internal_subnets, _ = config.find_all_availability_zones(session) internal_subnets_lambda, _ = config.find_all_availability_zones( session, lambda_compatible_only=True) topic_arn = aws.sns_topic_lookup(session, "ProductionMicronsMailingList") event_data = { "lambda-name": "delete_lambda", "db": names.endpoint_db, "meta-db": names.meta, "s3-index-table": names.s3_index, "id-index-table": names.id_index, "id-count-table": names.id_count_index, "cuboid_bucket": names.cuboid_bucket, "delete_bucket": names.delete_bucket, "topic-arn": topic_arn, "query-deletes-sfn-name": names.query_deletes, "delete-sfn-name": names.delete_cuboid, "delete-exp-sfn-name": names.delete_experiment, "delete-coord-frame-sfn-name": names.delete_coord_frame, "delete-coll-sfn-name": names.delete_collection } role_arn = aws.role_arn_lookup(session, "events_for_delete_lambda") multi_lambda = names.multi_lambda lambda_arn = aws.lambda_arn_lookup(session, multi_lambda) target_list = [{ "Arn": lambda_arn, "Id": multi_lambda, "Input": json.dumps(event_data) }] schedule_expression = "cron(1 6-11/1 ? * TUE-FRI *)" #schedule_expression = "cron(0/2 * * * ? *)" # testing fire every two minutes config.add_event_rule("DeleteEventRule", names.delete_event_rule, role_arn=role_arn, schedule_expression=schedule_expression, target_list=target_list, description=None) # Events have to be given permission to run lambda. config.add_lambda_permission('DeleteRulePerm', multi_lambda, principal='events.amazonaws.com', source=Arn('DeleteEventRule')) user_data = UserData() user_data["system"]["fqdn"] = names.activities user_data["system"]["type"] = "activities" user_data["aws"]["db"] = names.endpoint_db user_data["aws"]["cache"] = names.cache user_data["aws"]["cache-state"] = names.cache_state user_data["aws"]["cache-db"] = "0" user_data["aws"]["cache-state-db"] = "0" user_data["aws"]["meta-db"] = names.meta user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket user_data["aws"]["tile_bucket"] = names.tile_bucket user_data["aws"]["ingest_bucket"] = names.ingest_bucket user_data["aws"]["s3-index-table"] = names.s3_index user_data["aws"]["tile-index-table"] = names.tile_index user_data["aws"]["id-index-table"] = names.id_index user_data["aws"]["id-count-table"] = names.id_count_index config.add_autoscale_group("Activities", names.activities, aws.ami_lookup(session, 'activities.boss'), keypair, subnets=internal_subnets_lambda, type_=const.ACTIVITIES_TYPE, security_groups=[sgs[names.internal]], user_data=str(user_data), role=aws.instance_profile_arn_lookup( session, "activities"), min=1, max=1) config.add_lambda("IngestLambda", names.ingest_lambda, aws.role_arn_lookup(session, 'IngestQueueUpload'), const.INGEST_LAMBDA, handler="index.handler", timeout=60 * 5) config.add_lambda_permission("IngestLambdaExecute", Ref("IngestLambda")) return config
def create_config(session, domain, keypair=None, db_config={}): """ Create the CloudFormationConfiguration object. Args: session: amazon session object domain (string): domain of the stack being created keypair: keypair used to by instances being created db_config (dict): information needed by rds Returns: the config for the Cloud Formation stack """ names = AWSNames(domain) # Lookup IAM Role and SNS Topic ARNs for used later in the config endpoint_role_arn = aws.role_arn_lookup(session, "endpoint") cachemanager_role_arn = aws.role_arn_lookup(session, 'cachemanager') dns_arn = aws.sns_topic_lookup(session, names.dns.replace(".", "-")) if dns_arn is None: raise Exception("SNS topic named dns." + domain + " does not exist.") mailing_list_arn = aws.sns_topic_lookup(session, const.PRODUCTION_MAILING_LIST) if mailing_list_arn is None: msg = "MailingList {} needs to be created before running config".format(const.PRODUCTION_MAILING_LIST) raise Exception(msg) # Configure Vault and create the user data config that the endpoint will # use for connecting to Vault and the DB instance user_data = UserData() user_data["system"]["fqdn"] = names.endpoint user_data["system"]["type"] = "endpoint" user_data["aws"]["db"] = names.endpoint_db user_data["aws"]["cache"] = names.cache user_data["aws"]["cache-state"] = names.cache_state ## cache-db and cache-stat-db need to be in user_data for lambda to access them. user_data["aws"]["cache-db"] = "0" user_data["aws"]["cache-state-db"] = "0" user_data["aws"]["meta-db"] = names.meta # Use CloudFormation's Ref function so that queues' URLs are placed into # the Boss config file. user_data["aws"]["s3-flush-queue"] = str(Ref(names.s3flush_queue)) # str(Ref("S3FlushQueue")) DP XXX user_data["aws"]["s3-flush-deadletter-queue"] = str(Ref(names.deadletter_queue)) #str(Ref("DeadLetterQueue")) DP XXX user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket user_data["aws"]["tile_bucket"] = names.tile_bucket user_data["aws"]["ingest_bucket"] = names.ingest_bucket user_data["aws"]["s3-index-table"] = names.s3_index user_data["aws"]["tile-index-table"] = names.tile_index user_data["aws"]["id-index-table"] = names.id_index user_data["aws"]["id-count-table"] = names.id_count_index user_data["aws"]["prod_mailing_list"] = mailing_list_arn user_data["auth"]["OIDC_VERIFY_SSL"] = 'True' user_data["lambda"]["flush_function"] = names.multi_lambda user_data["lambda"]["page_in_function"] = names.multi_lambda user_data["lambda"]["ingest_function"] = names.multi_lambda user_data['sfn']['populate_upload_queue'] = names.ingest_queue_populate user_data['sfn']['upload_sfn'] = names.ingest_queue_upload user_data['sfn']['downsample_sfn'] = names.resolution_hierarchy # Prepare user data for parsing by CloudFormation. parsed_user_data = { "Fn::Join" : ["", user_data.format_for_cloudformation()]} config = CloudFormationConfiguration('api', domain, const.REGION) vpc_id = config.find_vpc(session) az_subnets, external_subnets = config.find_all_availability_zones(session) az_subnets_lambda, external_subnets_lambda = config.find_all_availability_zones(session, lambda_compatible_only=True) sgs = aws.sg_lookup_all(session, vpc_id) # DP XXX: hack until we can get productio updated correctly config.add_security_group('AllHTTPSSecurityGroup', 'https.' + domain, [('tcp', '443', '443', '0.0.0.0/0')]) sgs[names.https] = Ref('AllHTTPSSecurityGroup') # Create SQS queues and apply access control policies. #config.add_sqs_queue("DeadLetterQueue", names.deadletter_queue, 30, 20160) DP XXX config.add_sqs_queue(names.deadletter_queue, names.deadletter_queue, 30, 20160) max_receives = 3 #config.add_sqs_queue("S3FlushQueue", DP XXX config.add_sqs_queue(names.s3flush_queue, names.s3flush_queue, 30, dead=(Arn(names.deadletter_queue), max_receives)) config.add_sqs_policy("sqsEndpointPolicy", 'sqsEndpointPolicy', # DP XXX [Ref(names.deadletter_queue), Ref(names.s3flush_queue)], endpoint_role_arn) config.add_sqs_policy("sqsCachemgrPolicy", 'sqsCachemgrPolicy', # DP XXX [Ref(names.deadletter_queue), Ref(names.s3flush_queue)], cachemanager_role_arn) # Create the endpoint ASG, ELB, and RDS instance config.add_autoscale_group("Endpoint", names.endpoint, aws.ami_lookup(session, "endpoint.boss"), keypair, subnets=az_subnets_lambda, type_=const.ENDPOINT_TYPE, security_groups=[sgs[names.internal]], user_data=parsed_user_data, min=const.ENDPOINT_CLUSTER_MIN, max=const.ENDPOINT_CLUSTER_MAX, elb=Ref("EndpointLoadBalancer"), notifications=dns_arn, role=aws.instance_profile_arn_lookup(session, 'endpoint'), health_check_grace_period=90, detailed_monitoring=True, depends_on=["EndpointLoadBalancer", "EndpointDB"]) cert = aws.cert_arn_lookup(session, names.public_dns("api")) config.add_loadbalancer("EndpointLoadBalancer", names.endpoint_elb, [("443", "80", "HTTPS", cert)], subnets=external_subnets_lambda, security_groups=[sgs[names.internal], sgs[names.https]], public=True) # Endpoint servers are not CPU bound typically, so react quickly to load config.add_autoscale_policy("EndpointScaleUp", Ref("Endpoint"), adjustments=[ (0.0, 10, 1), # 12% - 22% Utilization add 1 instance (10, None, 2) # Above 22% Utilization add 2 instances ], alarms=[ ("CPUUtilization", "Maximum", "GreaterThanThreshold", "12") ], period=1) config.add_autoscale_policy("EndpointScaleDown", Ref("Endpoint"), adjustments=[ (None, 0.0, -1), # Under 1.5% Utilization remove 1 instance ], alarms=[ ("CPUUtilization", "Average", "LessThanThreshold", "1.5") ], period=50) config.add_rds_db("EndpointDB", names.endpoint_db, db_config.get("port"), db_config.get("name"), db_config.get("user"), db_config.get("password"), az_subnets, type_ = const.RDS_TYPE, security_groups=[sgs[names.internal]]) # Create the Meta, s3Index, tileIndex, annotation Dynamo tables with open(const.DYNAMO_METADATA_SCHEMA, 'r') as fh: dynamo_cfg = json.load(fh) config.add_dynamo_table_from_json("EndpointMetaDB", names.meta, **dynamo_cfg) with open(const.DYNAMO_S3_INDEX_SCHEMA, 'r') as s3fh: dynamo_s3_cfg = json.load(s3fh) config.add_dynamo_table_from_json('s3Index', names.s3_index, **dynamo_s3_cfg) # DP XXX with open(const.DYNAMO_TILE_INDEX_SCHEMA, 'r') as tilefh: dynamo_tile_cfg = json.load(tilefh) config.add_dynamo_table_from_json('tileIndex', names.tile_index, **dynamo_tile_cfg) # DP XXX with open(const.DYNAMO_ID_INDEX_SCHEMA, 'r') as id_ind_fh: dynamo_id_ind__cfg = json.load(id_ind_fh) config.add_dynamo_table_from_json('idIndIndex', names.id_index, **dynamo_id_ind__cfg) # DP XXX with open(const.DYNAMO_ID_COUNT_SCHEMA, 'r') as id_count_fh: dynamo_id_count_cfg = json.load(id_count_fh) config.add_dynamo_table_from_json('idCountIndex', names.id_count_index, **dynamo_id_count_cfg) # DP XXX return config
def create_config(bosslet_config, db_config={}): names = bosslet_config.names session = bosslet_config.session # Lookup IAM Role and SNS Topic ARNs for used later in the config endpoint_role_arn = aws.role_arn_lookup(session, "endpoint") cachemanager_role_arn = aws.role_arn_lookup(session, 'cachemanager') dns_arn = aws.sns_topic_lookup(session, names.dns.sns) if dns_arn is None: raise MissingResourceError('SNS topic', names.dns.sns) mailing_list_arn = aws.sns_topic_lookup(session, bosslet_config.ALERT_TOPIC) if mailing_list_arn is None: raise MissingResourceError('SNS topic', bosslet_config.ALERT_TOPIC) # Configure Vault and create the user data config that the endpoint will # use for connecting to Vault and the DB instance user_data = UserData() user_data["system"]["fqdn"] = names.endpoint.dns user_data["system"]["type"] = "endpoint" user_data["aws"]["db"] = names.endpoint_db.rds user_data["aws"]["cache"] = names.cache.redis user_data["aws"]["cache-state"] = names.cache_state.redis if const.REDIS_SESSION_TYPE is not None: user_data["aws"]["cache-session"] = names.cache_session.redis else: # Don't create a Redis server for dev stacks. user_data["aws"]["cache-session"] = '' if const.REDIS_THROTTLE_TYPE is not None: user_data["aws"]["cache-throttle"] = names.cache_throttle.redis else: user_data["aws"]["cache-throttle"] = '' ## cache-db and cache-stat-db need to be in user_data for lambda to access them. user_data["aws"]["cache-db"] = "0" user_data["aws"]["cache-state-db"] = "0" user_data["aws"]["cache-throttle-db"] = "0" user_data["aws"]["cache-session-db"] = "0" user_data["aws"]["meta-db"] = names.meta.ddb # Use CloudFormation's Ref function so that queues' URLs are placed into # the Boss config file. user_data["aws"]["s3-flush-queue"] = str(Ref( names.s3flush.sqs)) # str(Ref("S3FlushQueue")) DP XXX user_data["aws"]["s3-flush-deadletter-queue"] = str( Ref(names.deadletter.sqs)) #str(Ref("DeadLetterQueue")) DP XXX user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket.s3 user_data["aws"]["tile_bucket"] = names.tile_bucket.s3 user_data["aws"]["ingest_bucket"] = names.ingest_bucket.s3 user_data["aws"]["s3-index-table"] = names.s3_index.ddb user_data["aws"]["tile-index-table"] = names.tile_index.ddb user_data["aws"]["id-index-table"] = names.id_index.ddb user_data["aws"]["id-count-table"] = names.id_count_index.ddb user_data["aws"]["prod_mailing_list"] = mailing_list_arn user_data["aws"]["max_task_id_suffix"] = str(const.MAX_TASK_ID_SUFFIX) user_data["aws"]["id-index-new-chunk-threshold"] = str( const.DYNAMO_ID_INDEX_NEW_CHUNK_THRESHOLD) user_data["aws"]["index-deadletter-queue"] = str( Ref(names.index_deadletter.sqs)) user_data["aws"]["index-cuboids-keys-queue"] = str( Ref(names.index_cuboids_keys.sqs)) user_data["auth"]["OIDC_VERIFY_SSL"] = str(bosslet_config.VERIFY_SSL) user_data["lambda"]["flush_function"] = names.multi_lambda.lambda_ user_data["lambda"]["page_in_function"] = names.multi_lambda.lambda_ user_data["lambda"]["ingest_function"] = names.tile_ingest.lambda_ user_data["lambda"]["downsample_volume"] = names.downsample_volume.lambda_ user_data["lambda"]["tile_uploaded_function"] = names.tile_uploaded.lambda_ user_data['sfn']['populate_upload_queue'] = names.ingest_queue_populate.sfn user_data['sfn']['upload_sfn'] = names.ingest_queue_upload.sfn user_data['sfn'][ 'volumetric_upload_sfn'] = names.volumetric_ingest_queue_upload.sfn user_data['sfn']['downsample_sfn'] = names.resolution_hierarchy.sfn user_data['sfn'][ 'index_cuboid_supervisor_sfn'] = names.index_cuboid_supervisor.sfn # Prepare user data for parsing by CloudFormation. parsed_user_data = { "Fn::Join": ["", user_data.format_for_cloudformation()] } config = CloudFormationConfiguration('api', bosslet_config, version="2") keypair = bosslet_config.SSH_KEY vpc_id = config.find_vpc() internal_subnets, external_subnets = config.find_all_subnets() az_subnets_asg, external_subnets_asg = config.find_all_subnets( compatibility='asg') sgs = aws.sg_lookup_all(session, vpc_id) # DP XXX: hack until we can get productio updated correctly config.add_security_group( 'AllHttpHttpsSecurityGroup', names.https.sg, [('tcp', '443', '443', bosslet_config.HTTPS_INBOUND), ('tcp', '80', '80', bosslet_config.HTTPS_INBOUND)]) sgs[names.https.sg] = Ref('AllHttpHttpsSecurityGroup') # Create SQS queues and apply access control policies. # Deadletter queue for indexing operations. This one is populated # manually by states in the indexing step functions. config.add_sqs_queue(names.index_deadletter.sqs, names.index_deadletter.sqs, 30, 20160) # Queue that holds S3 object keys of cuboids to be indexed. config.add_sqs_queue(names.index_cuboids_keys.sqs, names.index_cuboids_keys.sqs, 120, 20160) #config.add_sqs_queue("DeadLetterQueue", names.deadletter.sqs, 30, 20160) DP XXX config.add_sqs_queue(names.deadletter.sqs, names.deadletter.sqs, 30, 20160) max_receives = 3 #config.add_sqs_queue("S3FlushQueue", DP XXX config.add_sqs_queue(names.s3flush.sqs, names.s3flush.sqs, 30, dead=(Arn(names.deadletter.sqs), max_receives)) config.add_sqs_policy( "sqsEndpointPolicy", 'sqsEndpointPolicy', # DP XXX [Ref(names.deadletter.sqs), Ref(names.s3flush.sqs)], endpoint_role_arn) config.add_sqs_policy( "sqsCachemgrPolicy", 'sqsCachemgrPolicy', # DP XXX [Ref(names.deadletter.sqs), Ref(names.s3flush.sqs)], cachemanager_role_arn) # Create the endpoint ASG, ELB, and RDS instance cert = aws.cert_arn_lookup(session, names.public_dns("api")) target_group_keys = config.add_app_loadbalancer( "EndpointAppLoadBalancer", names.endpoint_elb.dns, [("443", "80", "HTTPS", cert)], vpc_id=vpc_id, subnets=external_subnets_asg, security_groups=[sgs[names.internal.sg], sgs[names.https.sg]], public=True) target_group_arns = [Ref(key) for key in target_group_keys] config.add_public_dns('EndpointAppLoadBalancer', names.public_dns('api')) config.add_autoscale_group("Endpoint", names.endpoint.dns, aws.ami_lookup(bosslet_config, names.endpoint.ami), keypair, subnets=az_subnets_asg, type_=const.ENDPOINT_TYPE, security_groups=[sgs[names.internal.sg]], user_data=parsed_user_data, min=const.ENDPOINT_CLUSTER_MIN, max=const.ENDPOINT_CLUSTER_MAX, notifications=dns_arn, role=aws.instance_profile_arn_lookup( session, 'endpoint'), health_check_grace_period=90, detailed_monitoring=True, target_group_arns=target_group_arns, depends_on=["EndpointDB"]) # Endpoint servers are not CPU bound typically, so react quickly to load config.add_autoscale_policy( "EndpointScaleUp", Ref("Endpoint"), adjustments=[ (0.0, 10, 1), # 12% - 22% Utilization add 1 instance (10, None, 2) # Above 22% Utilization add 2 instances ], alarms=[("CPUUtilization", "Maximum", "GreaterThanThreshold", "12")], period=1) config.add_autoscale_policy( "EndpointScaleDown", Ref("Endpoint"), adjustments=[ (None, 0.0, -1), # Under 1.5% Utilization remove 1 instance ], alarms=[("CPUUtilization", "Average", "LessThanThreshold", "1.5")], period=50) config.add_rds_db("EndpointDB", names.endpoint_db.dns, db_config.get("port"), db_config.get("name"), db_config.get("user"), db_config.get("password"), internal_subnets, type_=const.RDS_TYPE, security_groups=[sgs[names.internal.sg]]) # Create the Meta, s3Index, tileIndex, annotation Dynamo tables with open(const.DYNAMO_METADATA_SCHEMA, 'r') as fh: dynamo_cfg = json.load(fh) config.add_dynamo_table_from_json("EndpointMetaDB", names.meta.ddb, **dynamo_cfg) with open(const.DYNAMO_S3_INDEX_SCHEMA, 'r') as s3fh: dynamo_s3_cfg = json.load(s3fh) config.add_dynamo_table_from_json('s3Index', names.s3_index.ddb, **dynamo_s3_cfg) # DP XXX with open(const.DYNAMO_TILE_INDEX_SCHEMA, 'r') as tilefh: dynamo_tile_cfg = json.load(tilefh) config.add_dynamo_table_from_json('tileIndex', names.tile_index.ddb, **dynamo_tile_cfg) # DP XXX with open(const.DYNAMO_ID_INDEX_SCHEMA, 'r') as id_ind_fh: dynamo_id_ind__cfg = json.load(id_ind_fh) config.add_dynamo_table_from_json('idIndIndex', names.id_index.ddb, **dynamo_id_ind__cfg) # DP XXX with open(const.DYNAMO_ID_COUNT_SCHEMA, 'r') as id_count_fh: dynamo_id_count_cfg = json.load(id_count_fh) config.add_dynamo_table_from_json('idCountIndex', names.id_count_index.ddb, **dynamo_id_count_cfg) # DP XXX return config
def create_config(session, domain): """Create the CloudFormationConfiguration object.""" config = CloudFormationConfiguration('activities', domain, const.REGION) names = AWSNames(domain) global keypair keypair = aws.keypair_lookup(session) vpc_id = config.find_vpc(session) sgs = aws.sg_lookup_all(session, vpc_id) internal_subnets, _ = config.find_all_availability_zones(session) internal_subnets_lambda, _ = config.find_all_availability_zones(session, lambda_compatible_only=True) topic_arn = aws.sns_topic_lookup(session, "ProductionMicronsMailingList") event_data = { "lambda-name": "delete_lambda", "db": names.endpoint_db, "meta-db": names.meta, "s3-index-table": names.s3_index, "id-index-table": names.id_index, "id-count-table": names.id_count_index, "cuboid_bucket": names.cuboid_bucket, "delete_bucket": names.delete_bucket, "topic-arn": topic_arn, "query-deletes-sfn-name": names.query_deletes, "delete-sfn-name": names.delete_cuboid, "delete-exp-sfn-name": names.delete_experiment, "delete-coord-frame-sfn-name": names.delete_coord_frame, "delete-coll-sfn-name": names.delete_collection } role_arn = aws.role_arn_lookup(session, "events_for_delete_lambda") multi_lambda = names.multi_lambda lambda_arn = aws.lambda_arn_lookup(session, multi_lambda) target_list = [{ "Arn": lambda_arn, "Id": multi_lambda, "Input": json.dumps(event_data) }] schedule_expression = "cron(1 6-11/1 ? * TUE-FRI *)" #schedule_expression = "cron(0/2 * * * ? *)" # testing fire every two minutes config.add_event_rule("DeleteEventRule", names.delete_event_rule, role_arn=role_arn, schedule_expression=schedule_expression, target_list=target_list, description=None) # Events have to be given permission to run lambda. config.add_lambda_permission('DeleteRulePerm', multi_lambda, principal='events.amazonaws.com', source=Arn('DeleteEventRule')) user_data = UserData() user_data["system"]["fqdn"] = names.activities user_data["system"]["type"] = "activities" user_data["aws"]["db"] = names.endpoint_db user_data["aws"]["cache"] = names.cache user_data["aws"]["cache-state"] = names.cache_state user_data["aws"]["cache-db"] = "0" user_data["aws"]["cache-state-db"] = "0" user_data["aws"]["meta-db"] = names.meta user_data["aws"]["cuboid_bucket"] = names.cuboid_bucket user_data["aws"]["tile_bucket"] = names.tile_bucket user_data["aws"]["ingest_bucket"] = names.ingest_bucket user_data["aws"]["s3-index-table"] = names.s3_index user_data["aws"]["tile-index-table"] = names.tile_index user_data["aws"]["id-index-table"] = names.id_index user_data["aws"]["id-count-table"] = names.id_count_index user_data["aws"]["max_task_id_suffix"] = str(const.MAX_TASK_ID_SUFFIX) config.add_autoscale_group("Activities", names.activities, aws.ami_lookup(session, 'activities.boss'), keypair, subnets=internal_subnets_lambda, type_=const.ACTIVITIES_TYPE, security_groups=[sgs[names.internal]], user_data=str(user_data), role=aws.instance_profile_arn_lookup(session, "activities"), min=1, max=1) config.add_lambda("IngestLambda", names.ingest_lambda, aws.role_arn_lookup(session, 'IngestQueueUpload'), const.INGEST_LAMBDA, handler="index.handler", timeout=60 * 5, memory=3008) config.add_lambda_permission("IngestLambdaExecute", Ref("IngestLambda")) # Downsample / Resolution Hierarchy support lambda_role = aws.role_arn_lookup(session, "lambda_resolution_hierarchy") config.add_lambda("DownsampleVolumeLambda", names.downsample_volume_lambda, lambda_role, s3=(aws.get_lambda_s3_bucket(session), "multilambda.{}.zip".format(domain), "downsample_volume.handler"), timeout=120, memory=1024, runtime='python3.6', dlq = Ref('DownsampleDLQ')) config.add_sns_topic("DownsampleDLQ", names.downsample_dlq, names.downsample_dlq, [('lambda', Arn('DownsampleDLQLambda'))]) config.add_lambda('DownsampleDLQLambda', names.downsample_dlq, lambda_role, const.DOWNSAMPLE_DLQ_LAMBDA, handler='index.handler', timeout=10) config.add_lambda_permission('DownsampleDLQLambdaExecute', Ref('DownsampleDLQLambda')) return config
def create_config(session, domain): """Create the CloudFormationConfiguration object. :arg session used to perform lookups :arg domain DNS name of vpc """ config = CloudFormationConfiguration('cloudwatch', domain) names = AWSNames(domain) vpc_id = config.find_vpc(session) lambda_subnets, _ = config.find_all_availability_zones( session, lambda_compatible_only=True) internal_sg = aws.sg_lookup(session, vpc_id, names.internal) loadbalancer_name = names.endpoint_elb if not aws.lb_lookup(session, loadbalancer_name): raise Exception("Invalid load balancer name: " + loadbalancer_name) # TODO Test that MailingListTopic is working. production_mailing_list = const.PRODUCTION_MAILING_LIST mailing_list_arn = aws.sns_topic_lookup(session, production_mailing_list) if mailing_list_arn is None: #config.add_sns_topic("topicList", production_mailing_list) msg = "MailingList {} needs to be created before running config" raise Exception(msg.format(const.PRODUCTION_MAILING_LIST)) config.add_cloudwatch(loadbalancer_name, [mailing_list_arn]) lambda_role = aws.role_arn_lookup(session, 'VaultConsulHealthChecker') config.add_arg( Arg.String('VaultConsulHealthChecker', lambda_role, 'IAM role for vault/consul health check.' + domain)) config.add_lambda('VaultLambda', names.vault_monitor, description='Check health of vault instances.', timeout=30, role=Ref('VaultConsulHealthChecker'), security_groups=[internal_sg], subnets=lambda_subnets, handler='index.lambda_handler', file=const.VAULT_LAMBDA) config.add_lambda('ConsulLambda', names.consul_monitor, description='Check health of vault instances.', timeout=30, role=Ref('VaultConsulHealthChecker'), security_groups=[internal_sg], subnets=lambda_subnets, handler='index.lambda_handler', file=const.CONSUL_LAMBDA) # Lambda input data json_str = json.dumps({ 'vpc_id': vpc_id, 'vpc_name': domain, 'topic_arn': mailing_list_arn, }) config.add_cloudwatch_rule( 'VaultConsulCheck', name=names.vault_consul_check, description='Check health of vault and consul instances.', targets=[ { 'Arn': Arn('VaultLambda'), 'Id': names.vault_monitor, 'Input': json_str }, { 'Arn': Arn('ConsulLambda'), 'Id': names.consul_monitor, 'Input': json_str }, ], schedule='rate(1 minute)', depends_on=['VaultLambda', 'ConsulLambda']) config.add_lambda_permission('VaultPerms', names.vault_monitor, principal='events.amazonaws.com', source=Arn('VaultConsulCheck')) config.add_lambda_permission('ConsulPerms', names.consul_monitor, principal='events.amazonaws.com', source=Arn('VaultConsulCheck')) return config