def load_event_definitions(self): self.scheduler_table.reread_table() self.events = self.scheduler_table.get_dict() for e in self.events: if not isinstance(self.events[e], str): continue digest = misc.sha256("%s:%s" % (e, self.events[e])) event_name = "CS-Cron-%s%s-%s" % (self.context["GroupName"], self.context["VariantNumber"], digest[:10]) try: self.event_names.append({ "Name": event_name, "EventName": e, "Event": self.events[e], "Data": misc.parse_line_as_list_of_dict(self.events[e], leading_keyname="schedule") }) except Exception as ex: log.exception( "Failed to parse Scheduler event '%s' (%s) : %s" % (e, self.events[e], ex))
def DynamoDBParameters_CreateOrUpdate(data, AccountId=None, Region=None, DynamoDBConfiguration=None): config = misc.parse_line_as_list_of_dict(DynamoDBConfiguration, with_leading_string=False) TABLES = [ "ConfigTable", "AlarmStateEC2Table", "EventTable", "LongTermEventTable", "SchedulerTable", "StateTable" ] for c in TABLES: data["%s.BillingMode" % c] = "PAY_PER_REQUEST" data["%s.ProvisionedThroughput" % c] = { "ReadCapacityUnits": "0", "WriteCapacityUnits": "0" } if len(config) and c in config[0] and config[0][c] != "": try: capacity = config[0][c].split(":") read_capacity, write_capacity = ( capacity[0], capacity[1] if len(capacity) > 1 else capacity[0]) data["%s.BillingMode" % c] = "PROVISIONED" data["%s.ProvisionedThroughput" % c] = { "ReadCapacityUnits": str(int(read_capacity)), "WriteCapacityUnits": str(int(write_capacity)) } except Exception as e: raise ValueError( "Failed to parse DynamoDBParameters keyword '%s' with value '%s'!" % (c, config[0][c]))
def ApiGWParameters_CreateOrUpdate(data, AccountId=None, Region=None, Dummy=None, ApiGWConfiguration=None, ApiGWEndpointConfiguration=None, DefaultGWPolicyURL=None): data["GWType"] = "REGIONAL" data["GWPolicy"] = get_policy_content(DefaultGWPolicyURL, AccountId, Region) data["VpcEndpointDNS"] = "" config = misc.parse_line_as_list_of_dict(ApiGWConfiguration, leading_keyname="GWType") if len(config): data.update(config[0]) CONFIG_KEYS = ["GWPolicy", "GWType", "VpcEndpointDNS"] if len(config): a = config[0] for kw in a.keys(): if kw not in CONFIG_KEYS: raise ValueError("ApiGWConfiguration: Unknown meta key '%s'!" % kw) if kw == "GWType": valid_endpoint_configurations = ["REGIONAL", "PRIVATE"] if a[kw] not in valid_endpoint_configurations: raise ValueError( "Can't set API GW Endpoint to value '%s'! (valid values are %s)" % (a[kw], valid_endpoint_configurations)) if kw == "GWPolicy" and len(a[kw]): data["GWPolicy"] = get_policy_content(a[kw], AccountId, Region) log.info(Dbg.pprint(data["GWPolicy"])) data["EndpointConfiguration.Type"] = data["GWType"]
def register(config, ignore_double_definition=False, layer="Built-in defaults", create_layer_when_needed=False): if _init is None: return layer_struct = next( filter(lambda l: l["source"] == layer, _init["all_configs"]), None) if layer_struct is None: if not create_layer_when_needed: raise Exception(f"Unknown config '{layer}'!") layer_struct = {"source": layer, "config": {}, "metas": {}} _init["dynamic_config"].append(layer_struct) layer_config = layer_struct["config"] layer_metas = layer_struct["metas"] for c in config: p = misc.parse_line_as_list_of_dict(c) key = p[0]["_"] if not ignore_double_definition and key in layer_config: raise Exception("Double definition of key '%s'!" % key) layer_config[key] = config[c] layer_metas[key] = dict(p[0]) # Build the config layer stack layers = [] # Add built-in config layers.extend(_init["all_configs"]) # Add file loaded config if "loaded_files" in _init: layers.extend(_init["loaded_files"]) if _init["with_kvtable"]: # Add DynamoDB based configuration layers.extend([{ "source": "DynamoDB configuration table '%s'" % _init["context"]["ConfigurationTable"], "config": _init["configuration_table"].get_dict() }]) layers.extend(_init["dynamic_config"]) _init["config_layers"] = layers # Update config.active_parameter_set builtin_config = _init["all_configs"][0]["config"] for cfg in _get_config_layers(reverse=True): c = cfg["config"] if "config.active_parameter_set" in c: if c == builtin_config and isinstance(c, dict): _init["active_parameter_set"] = c[ "config.active_parameter_set"]["DefaultValue"] else: _init["active_parameter_set"] = c[ "config.active_parameter_set"] break _parameterset_sanity_check() # Create a lookup efficient key cache compile_keys()
def register(config, ignore_double_definition=False): if _init is None: return builtin_config = _init["all_configs"][0]["config"] builtin_metas = _init["all_configs"][0]["metas"] for c in config: p = misc.parse_line_as_list_of_dict(c) key = p[0]["_"] if not ignore_double_definition and key in builtin_config: raise Exception("Double definition of key '%s'!" % key) builtin_config[key] = config[c] builtin_metas[key] = dict(p[0])
def get_metastring_list(self, key, default=None, TTL=None): value = self.get_state(key, default=default, TTL=TTL) return misc.parse_line_as_list_of_dict(value, default=default)
def get_prerequisites(self): now = self.context["now"] client = self.context["cloudwatch.client"] # Read all CloudWatch alarm templates into memory alarm_definitions = {} for i in range(0, Cfg.get_int("cloudwatch.alarms.max_per_instance")): key = "cloudwatch.alarm%02d.configuration_url" % (i) r = Cfg.get_extended(key) if not r["Success"] or r["Value"] == "": continue d = misc.parse_line_as_list_of_dict(r["Value"]) url = d[0]["_"] meta = d[0] index = "%02d" % i alarm_defs = { "Index": index, "Key": key, "Url": url, "Definition": r, "Metadata": meta } prefix = "alarmname:" if url.startswith(prefix): alarm_defs["AlarmName"] = url[len(prefix):] else: log.log(log.NOTICE, "Read Alarm definition: %s" % r["Value"]) try: resp = misc.get_url(url.format(**self.context)) if resp is None: raise Exception("URL content = <None>") alarm_defs["Content"] = str(resp, "utf-8") except Exception as e: log.exception("Failed to load Alarm definition '%s' : %e" % (r["Value"], e)) continue alarm_definitions[index] = alarm_defs self.alarm_definitions = alarm_definitions # Read all existing CloudWatch alarms alarms = [] response = None while (response is None or "NextToken" in response): response = client.describe_alarms(MaxRecords=Cfg.get_int( "cloudwatch.describe_alarms.max_results"), NextToken=response["NextToken"] if response is not None else "") #log.debug(Dbg.pprint(response)) for alarm in response["MetricAlarms"]: alarm_name = alarm["AlarmName"] alarm_def = self.get_alarm_configuration_by_name(alarm_name) if alarm_def is not None: # This is an alarm thats belong to this CloneSquad instance alarms.append(alarm) #log.debug(Dbg.pprint(alarms)) self.alarms = alarms # Sanity check for index in self.alarm_definitions.keys(): alarm_def = self.alarm_definitions[index] if "AlarmName" not in alarm_def: continue alarm = next( filter(lambda a: a["AlarmName"] == alarm_def["AlarmName"], self.alarms), None) if alarm is None: log.warning( "Alarm definition [%s](%s => %s) doesn't match an existing CloudWatch alarm!" % (alarm_def["Definition"]["Key"], alarm_def["Definition"]["Value"], alarm_def["Definition"]["Status"])) # Read all metrics associated with alarms # CloudWatch intense polling can be expensive: This algorithm links CW metric polling rate to the # scale rate => Under intense scale up condition, polling is aggresive. If not, it falls down # to one polling every 'cloudwatch.metrics.low_rate_polling_interval' seconds # TODO(@jcjorel): Avoid this kind of direct references to an upper level module!! integration_period = Cfg.get_duration_secs( "ec2.schedule.horizontalscale.integration_period") instance_scale_score = self.ec2.get_integrated_float_state( "ec2.schedule.scaleout.instance_scale_score", integration_period) self.metric_cache = self.get_metric_cache() query = {"IdMapping": {}, "Queries": []} # Build query for Alarm metrics if Cfg.get("ec2.schedule.desired_instance_count") == "-1": # Sort by oldest alarms first in cache cached_metric_names = [m["_MetricId"] for m in self.metric_cache] valid_alarms = [] for a in alarms: alarm_name = a["AlarmName"] alarm_def = self.get_alarm_configuration_by_name(alarm_name) if alarm_def is None or alarm_def["AlarmDefinition"][ "Url"].startswith("alarmname:"): continue a["_SamplingTime"] = self.get_metric_by_id( alarm_name )["_SamplingTime"] if alarm_name in cached_metric_names else str( misc.epoch()) valid_alarms.append(a) sorted_alarms = sorted( valid_alarms, key=lambda a: misc.str2utc(a["_SamplingTime"])) # We poll from the oldest to the newest and depending on the instance_scale_score to limit CloudWacth GetMetricData costs time_for_full_metric_refresh = max( Cfg.get_duration_secs( "cloudwatch.metrics.time_for_full_metric_refresh"), 1) app_run_period = Cfg.get_duration_secs("app.run_period") minimum_polled_alarms_per_run = Cfg.get_int( "cloudwatch.metrics.minimum_polled_alarms_per_run") maximum_polled_alarms_per_run = app_run_period / time_for_full_metric_refresh maximum_polled_alarms_per_run = min(maximum_polled_alarms_per_run, 1.0) weight = min(instance_scale_score, maximum_polled_alarms_per_run) max_alarms_for_this_run = max( minimum_polled_alarms_per_run, int(min(weight, 1.0) * len(sorted_alarms))) for alarm in sorted_alarms[:max_alarms_for_this_run]: alarm_name = alarm["AlarmName"] CloudWatch._format_query(query, alarm_name, alarm) # We always poll user supplied alarms for alarm in alarms: alarm_name = alarm["AlarmName"] alarm_def = self.get_alarm_configuration_by_name(alarm_name) if alarm_def is None: continue # Unknown alarm name if not alarm_def["AlarmDefinition"]["Url"].startswith( "alarmname:"): continue CloudWatch._format_query(query, alarm_name, alarm) # Query Metric for Burstable instances burstable_instances = self.ec2.get_burstable_instances( ScalingState="-error") last_collect_date = self.ec2.get_state_date( "cloudwatch.metrics.last_burstable_metric_collect_date") if last_collect_date is None or (now - last_collect_date) > timedelta( minutes=1): for i in burstable_instances: instance_id = i["InstanceId"] if not self.ec2.is_static_subfleet_instance( instance_id) and self.ec2.get_scaling_state( instance_id) == "excluded": continue CloudWatch._format_query( query, "%s/%s" % ("CPUCreditBalance", instance_id), { "MetricName": "CPUCreditBalance", "Namespace": "AWS/EC2", "Dimensions": [{ "Name": "InstanceId", "Value": instance_id }], "Period": 300, "Statistic": "Average" }) self.ec2.set_state( "cloudwatch.metrics.last_burstable_metric_collect_date", now, TTL=Cfg.get_duration_secs("cloudwatch.default_ttl")) # Make request to CloudWatch query_counter = self.ec2.get_state_int( "cloudwatch.metric.query_counter", default=0) queries = query["Queries"] metric_results = [] metric_ids = [] no_metric_ids = [] while len(queries) > 0: q = queries[:500] queries = queries[500:] results = [] response = None while response is None or "NextToken" in response: args = { "MetricDataQueries": q, "StartTime": now - timedelta(seconds=Cfg.get_duration_secs( "cloudwatch.metrics.data_period")), "EndTime": now } if response is not None: args["NextToken"] = response["NextToken"] response = client.get_metric_data(**args) results.extend(response["MetricDataResults"]) query_counter += len(q) for r in results: if r["StatusCode"] != "Complete": log.error("Failed to retrieve metrics: %s" % q) continue metric_id = query["IdMapping"][r["Id"]] if len(r["Timestamps"]) == 0: if metric_id not in no_metric_ids: no_metric_ids.append(metric_id) continue if metric_id not in metric_ids: metric_ids.append(metric_id) r["_MetricId"] = metric_id r["_SamplingTime"] = str(now) log.debug(r) metric_results.append(r) if len(no_metric_ids): log.info("No metrics returned for alarm '%s'" % no_metric_ids) # Merge with existing cache metric metric_cache = self.metric_cache self.metric_cache = metric_results for m in metric_cache: max_retention_period = Cfg.get_duration_secs( "cloudwatch.metrics.cache.max_retention_period") if m["_MetricId"] in metric_ids or "_SamplingTime" not in m: continue if (now - misc.str2utc(m["_SamplingTime"]) ).total_seconds() < max_retention_period: self.metric_cache.append(m) self.ec2.set_state("cloudwatch.metric.query_counter", query_counter, TTL=Cfg.get_duration_secs("cloudwatch.default_ttl")) self.ec2.set_state_json( "cloudwatch.metrics.cache", self.metric_cache, TTL=Cfg.get_duration_secs("cloudwatch.default_ttl")) self.set_metric("Cloudwatch.GetMetricData", query_counter) # Augment Alarm definitions and Instances with associated metrics for metric in self.metric_cache: metric_id = metric["_MetricId"] alarm_data = self.get_alarm_data_by_name(metric_id) if alarm_data is not None: alarm_data["MetricDetails"] = metric continue instance = next( filter( lambda i: "CPUCreditBalance/%s" % i["InstanceId"] == metric_id, burstable_instances), None) if instance is not None: instance["_Metrics"] = {} instance["_Metrics"]["CPUCreditBalance"] = metric continue
def get_list_of_dict(key): v = get(key) if v is None: return [] return misc.parse_line_as_list_of_dict(v)
parser.add_argument('--static-fleet-specs', help="Static fleet Instances specifications", type=str, default="") parser.add_argument('--static-fleet-rds-specs', help="Static fleet RDS specifications", type=str, default="") args = parser.parse_args() args_dict = {} for a in args._get_kwargs(): args_dict[a[0]] = a[1] oneliner_args = convert( misc.parse_line_as_list_of_dict(args.specs, leading_keyname="InstanceType")) oneliner_fleet_args = convert( misc.parse_line_as_list_of_dict(args.static_fleet_specs, leading_keyname="InstanceType")) oneliner_fleet_rds_args = convert( misc.parse_line_as_list_of_dict(args.static_fleet_rds_specs, leading_keyname="Engine")) seed = hashlib.md5() seed.update(bytes(os.environ["AWS_ACCOUNT_ID"], "utf-8")) # Look for more entropy with open("%s/deployment-parameters.txt" % os.environ["CLONESQUAD_PARAMETER_DIR"]) as f: seed.update(bytes(f.read(), "utf-8")) user = "******" % seed.hexdigest()[:8] seed.update(bytes("password", "utf-8")) password = "******" % seed.hexdigest()[:16]
def get_metastring_list(self, key, default=None): value = self.table.get_kv(key) return misc.parse_line_as_list_of_dict(value, default=default)
a["type"] = "DBCluster" if a["Engine"].startswith("aurora") else "DBInstance" return args parser = argparse.ArgumentParser(description="Generate CloudFormation template for Test Environments") parser.add_argument('--specs', help="Instances specifications", type=str, default="") parser.add_argument('--subnet_count', help="Number of Persistent Spot instances", type=int, default=3) parser.add_argument('--subfleet-specs', help="fleet Instances specifications", type=str, default="") parser.add_argument('--subfleet-rds-specs', help="fleet RDS specifications", type=str, default="") args = parser.parse_args() args_dict = {} for a in args._get_kwargs(): args_dict[a[0]] = a[1] oneliner_args = convert(misc.parse_line_as_list_of_dict(args.specs, leading_keyname="InstanceType")) oneliner_fleet_args = convert(misc.parse_line_as_list_of_dict(args.subfleet_specs, leading_keyname="InstanceType")) oneliner_fleet_rds_args = convert(misc.parse_line_as_list_of_dict(args.subfleet_rds_specs, leading_keyname="Engine")) seed = hashlib.md5() seed.update(bytes(os.environ["AWS_ACCOUNT_ID"], "utf-8")) # Look for more entropy with open("%s/deployment-parameters.txt" % os.environ["CLONESQUAD_PARAMETER_DIR"]) as f: seed.update(bytes(f.read(), "utf-8")) user = "******" % seed.hexdigest()[:8] seed.update(bytes("password", "utf-8")) password = "******" % seed.hexdigest()[:16] args_dict["parameters"] = { "nb_of_instance_specs": len(oneliner_args), "specs": oneliner_args, "nb_of_subfleets": len(oneliner_fleet_args), "subfleet_specs": oneliner_fleet_args,
def ApiGWVpcEndpointParameters_CreateOrUpdate( data, AccountId=None, Region=None, ApiGWId=None, ApiGWConfiguration=None, ApiGWEndpointConfiguration=None, DefaultGWVpcEndpointPolicyURL=None): endpoint_config = misc.parse_line_as_list_of_dict( ApiGWEndpointConfiguration, with_leading_string=False) edp = endpoint_config[0] if len(endpoint_config): data.update(edp) if "VpcId" not in edp: raise ValueError( "'VpcId' keyword is mandatory for ApiGWEndpointConfiguration!") data["VpcId"] = edp["VpcId"] del edp["VpcId"] # Policy Document data["PolicyDocument"] = get_policy_content(DefaultGWVpcEndpointPolicyURL, AccountId, Region, api_gw_id=ApiGWId) if "VpcEndpointPolicyURL" in edp: data["PolicyDocument"] = get_policy_content( edp["VpcEndpointPolicyURL"], AccountId, Region, api_gw_id=ApiGWId) # Get SubnetIds list if "SubnetIds" in edp: subnet_ids = edp["SubnetIds"].split(",") del edp["SubnetIds"] else: # Fetch all Subnets of the VPC client = boto3.client("ec2") response = client.describe_subnets(Filters=[{ "Name": "vpc-id", "Values": [data["VpcId"]] }]) if not len(response["Subnets"]): raise ValueError("Specified VPC '%s' doesn't contain any subnet!" % data["VpcId"]) subnet_ids = [s["SubnetId"] for s in response["Subnets"]] log.info("SubnetIds=%s" % subnet_ids) data["SubnetIds"] = subnet_ids # PrivateDnsEnabled data["PrivateDnsEnabled"] = True if "PrivateDnsEnabled" in edp: data["PrivateDnsEnabled"] = bool(edp["PrivateDnsEnabled"]) del edp["PrivateDnsEnabled"] # Security group for VPC Endpoint data["SecurityGroupIngressRule"] = [{ "IpProtocol": "-1", "FromPort": "-1", "ToPort": "-1", "CidrIp": "0.0.0.0/0" }] if "TrustedClients" in edp: data["SecurityGroupIngressRule"] = generate_igress_sg_rule( edp["TrustedClients"]) del edp["TrustedClients"] if len(edp.keys()): raise ValueError( "Unknown keywords in ApiGWVpcEndpointParameters '%s'!" % edp.keys())