def scale_instances(tasks_per_instance, group_name, total_groups): conn = AutoScaleConnection() group = conn.get_all_groups(names=[group_name])[0] if group.desired_capacity == group.max_size: logger.info('Maximum number of instances reached') return tasks_count = get_sqs_tasks_count() if not tasks_count: logger.info('No tasks left in queues') return logger.info('Num of tasks in queues %s', tasks_count) tasks_per_instance = float(tasks_per_instance) additional_instances_count = int( ceil(tasks_count / tasks_per_instance) / total_groups) updated_instances_count = \ group.desired_capacity + additional_instances_count # consider max allowed instances if updated_instances_count > group.max_size: updated_instances_count = group.max_size logger.info('Updating group from %s to %s instances', group.desired_capacity, updated_instances_count) group.set_capacity(updated_instances_count) group.desired_capacity = updated_instances_count group.update() logger.info('Done\n')
def main(): parser = optparse.OptionParser() parser.add_option( "-c", "--config", dest="config_file", help="AutoScale config INI", metavar="FILE" ) (options, args) = parser.parse_args() logging.info( "Using config file [%s]" % options.config_file ) config = parse_config( options.config_file ) aws_access = config.get("AWS", 'access') aws_secret = config.get("AWS", 'secret') logging.debug( "Connecting to AWS with access [%s] and secret [%s]" % ( aws_access, aws_secret ) ) aws_connection = AutoScaleConnection( aws_access, aws_secret ) print "AutoScalingGroups:" print aws_connection.get_all_groups().__dict__
def create_autoscaling_group(): global img conn = AutoScaleConnection(os.environ['AWS_ACCESS_KEY_ID'], os.environ['AWS_SECRET_ACCESS_KEY']) autoscale = boto.ec2.autoscale.connect_to_region('us-east-1') print conn.get_all_groups() timestamp = time.time() value = datetime.datetime.fromtimestamp(timestamp) humanreadabledate = value.strftime('%Y-%m-%d_%H.%M.%S') config_name = 'live_launch_config'+humanreadabledate init_script = "#!/bin/sh /home/ec2-user/sds/deployment_scripts/initialize_server.py" lc = LaunchConfiguration(name=config_name, image_id=img, key_name='SDSEastKey', security_groups=['sg-a7afb1c2'], user_data=init_script) conn.create_launch_configuration(lc) ag = AutoScalingGroup(group_name=config_name, load_balancers=['SDSLiveLoadBalancer'], availability_zones=['us-east-1a'], launch_config=lc, min_size=2, max_size=2, connection=conn) conn.create_auto_scaling_group(ag)
def test_basic(self): # NB: as it says on the tin these are really basic tests that only # (lightly) exercise read-only behaviour - and that's only if you # have any autoscale groups to introspect. It's useful, however, to # catch simple errors print '--- running %s tests ---' % self.__class__.__name__ c = AutoScaleConnection() self.assertTrue(repr(c).startswith('AutoScaleConnection')) groups = c.get_all_groups() for group in groups: self.assertTrue(type(group), AutoScalingGroup) # get activities activities = group.get_activities() for activity in activities: self.assertEqual(type(activity), Activity) # get launch configs configs = c.get_all_launch_configurations() for config in configs: self.assertTrue(type(config), LaunchConfiguration) # get policies policies = c.get_all_policies() for policy in policies: self.assertTrue(type(policy), ScalingPolicy) # get scheduled actions actions = c.get_all_scheduled_actions() for action in actions: self.assertTrue(type(action), ScheduledUpdateGroupAction) # get instances instances = c.get_all_autoscaling_instances() for instance in instances: self.assertTrue(type(instance), Instance) # get all scaling process types ptypes = c.get_all_scaling_process_types() for ptype in ptypes: self.assertTrue(type(ptype), ProcessType) # get adjustment types adjustments = c.get_all_adjustment_types() for adjustment in adjustments: self.assertTrue(type(adjustment), AdjustmentType) # get metrics collection types types = c.get_all_metric_collection_types() self.assertTrue(type(types), MetricCollectionTypes) print '--- tests completed ---'
def get_all_group_instances_and_conn(): conn = AutoScaleConnection() global autoscale_conn autoscale_conn = conn ec2 = boto.ec2.connect_to_region('us-east-1') groups = conn.get_all_groups( names=['SCCluster1', 'SCCluster2', 'SCCluster3', 'SCCluster4']) # TODO: update this list instances = [instance for group in groups for instance in group] if not instances: sys.exit() instance_ids = [instance.instance_id for instance in instances] instances = ec2.get_only_instances(instance_ids) return instances, conn
def autoscale_group_hosts(group_name): import boto.ec2 from boto.ec2.autoscale import AutoScaleConnection ec2 = boto.connect_ec2() conn = AutoScaleConnection() groups = conn.get_all_groups(names=[]) groups = [ group for group in groups if group.name.startswith(group_name) ] instance_ids = [] instances = [] for group in groups: print group.name instance_ids.extend([i.instance_id for i in group.instances]) instances.extend(ec2.get_only_instances(instance_ids)) return [i.private_ip_address for i in instances], instances[0].id, instances[0].tags.get("aws:autoscaling:groupName")
def get_all_group_instances_and_conn( groups_names=get_autoscale_groups()['groups']): conn = AutoScaleConnection() global autoscale_conn autoscale_conn = conn ec2 = boto.ec2.connect_to_region('us-east-1') selected_group_name = random.choice(groups_names) logger.info('Selected autoscale group: %s' % selected_group_name) group = conn.get_all_groups(names=[selected_group_name])[0] if not group.instances: logger.info("No working instances in selected group %s" % selected_group_name) upload_logs_to_s3() sys.exit() instance_ids = [i.instance_id for i in group.instances] instances = ec2.get_only_instances(instance_ids) return instances, conn
def launch_auto_scaling(stage = 'development'): config = get_provider_dict() from boto.ec2.autoscale import AutoScaleConnection, AutoScalingGroup, LaunchConfiguration, Trigger conn = AutoScaleConnection(fabric.api.env.conf['AWS_ACCESS_KEY_ID'], fabric.api.env.conf['AWS_SECRET_ACCESS_KEY'], host='%s.autoscaling.amazonaws.com' % config['location'][:-1]) for name, values in config.get(stage, {}).get('autoscale', {}): if any(group.name == name for group in conn.get_all_groups()): fabric.api.warn(fabric.colors.orange('Autoscale group %s already exists' % name)) continue lc = LaunchConfiguration(name = '%s-launch-config' % name, image_id = values['image'], key_name = config['key']) conn.create_launch_configuration(lc) ag = AutoScalingGroup(group_name = name, load_balancers = values.get('load-balancers'), availability_zones = [config['location']], launch_config = lc, min_size = values['min-size'], max_size = values['max-size']) conn.create_auto_scaling_group(ag) if 'min-cpu' in values and 'max-cpu' in values: tr = Trigger(name = '%s-trigger' % name, autoscale_group = ag, measure_name = 'CPUUtilization', statistic = 'Average', unit = 'Percent', dimensions = [('AutoScalingGroupName', ag.name)], period = 60, lower_threshold = values['min-cpu'], lower_breach_scale_increment = '-1', upper_threshold = values['max-cpu'], upper_breach_scale_increment = '2', breach_duration = 60) conn.create_trigger(tr)
def _is_up_to_date(): """ Returns True if this instance is up to date. """ # Retrieve instance information. conn = AutoScaleConnection() pool = conn.get_all_groups(["LSDA Worker Pool"])[0] config = conn.get_all_launch_configurations( names=[pool.launch_config_name])[0] # Retrive the AMI for this instance and for others. config_ami = config.image_id my_ami = urllib.urlopen("http://169.254.169.254/latest/" "meta-data/ami-id").read() return config_ami == my_ami
def autoscale_group_hosts(group_name): import boto.ec2 from boto.ec2.autoscale import AutoScaleConnection ec2 = boto.connect_ec2() conn = AutoScaleConnection() groups = conn.get_all_groups(names=[]) groups = [group for group in groups if group.name.startswith(group_name)] instance_ids = [] instances = [] for group in groups: print "group name:", group.name instance_ids.extend([i.instance_id for i in group.instances]) instances.extend(ec2.get_only_instances(instance_ids)) return set([ i.private_ip_address for i in instances ]), instances[0].id, instances[0].tags.get("aws:autoscaling:groupName")
def find_unused_launch_configs(): conn = AutoScaleConnection() autoscale_groups = conn.get_all_groups(max_records=100) launch_configs = conn.get_all_launch_configurations(max_records=100) launch_config_names = {lc.name for lc in launch_configs} used_launch_config_names = {asg.launch_config_name for asg in autoscale_groups} unused_launch_config_names = launch_config_names - used_launch_config_names print "Autoscale Groups and Current Launch Configs:" print "{:<40}{:<40}".format("ASG", "LC") for asg in autoscale_groups: #print "asg:", asg.name, "-> lc:", asg.launch_config_name print "{:<40}{:<40}".format(asg.name, asg.launch_config_name) print "\nUnused Launch Configs: (launch configs without a autoscale group)" unused_launch_config_names = list(sorted(unused_launch_config_names)) for unused_launch_config in unused_launch_config_names: print "\t", unused_launch_config return unused_launch_config_names
def delete_autoscaling(): con = AutoScaleConnection(aws_secret_access_key=AWS_SECRET_ACCESS_KEY, aws_access_key_id=AWS_ACCESS_KEY, region=RegionInfo(name=REGION, endpoint='autoscaling.%s.amazonaws.com' % REGION)) print "Deleting autoscaling group.." group = con.get_all_groups(names=[AUTOSCALING_GROUP_NAME])[0] print "shutting down instances" group.shutdown_instances() time.sleep(LONG_SLEEP_PERIOD) print "Deleting autoscaling group itself" con.delete_auto_scaling_group(AUTOSCALING_GROUP_NAME, force_delete=True) print "Deleting launch configuration" con.delete_launch_configuration(AUTOSCALING_GROUP_NAME) con.close()
def launch_auto_scaling(stage='development'): config = get_provider_dict() from boto.ec2.autoscale import AutoScaleConnection, AutoScalingGroup, LaunchConfiguration, Trigger conn = AutoScaleConnection(fabric.api.env.conf['AWS_ACCESS_KEY_ID'], fabric.api.env.conf['AWS_SECRET_ACCESS_KEY'], host='%s.autoscaling.amazonaws.com' % config['location'][:-1]) for name, values in config.get(stage, {}).get('autoscale', {}): if any(group.name == name for group in conn.get_all_groups()): fabric.api.warn( fabric.colors.orange('Autoscale group %s already exists' % name)) continue lc = LaunchConfiguration(name='%s-launch-config' % name, image_id=values['image'], key_name=config['key']) conn.create_launch_configuration(lc) ag = AutoScalingGroup(group_name=name, load_balancers=values.get('load-balancers'), availability_zones=[config['location']], launch_config=lc, min_size=values['min-size'], max_size=values['max-size']) conn.create_auto_scaling_group(ag) if 'min-cpu' in values and 'max-cpu' in values: tr = Trigger(name='%s-trigger' % name, autoscale_group=ag, measure_name='CPUUtilization', statistic='Average', unit='Percent', dimensions=[('AutoScalingGroupName', ag.name)], period=60, lower_threshold=values['min-cpu'], lower_breach_scale_increment='-1', upper_threshold=values['max-cpu'], upper_breach_scale_increment='2', breach_duration=60) conn.create_trigger(tr)
class EbsHelper(object): """ Class for helping with ebs """ def __init__(self, aws, wait_time_secs, app_name=None,): """ Creates the EbsHelper """ self.aws = aws self.ebs = connect_to_region(aws.region, aws_access_key_id=aws.access_key, aws_secret_access_key=aws.secret_key, security_token=aws.security_token) self.autoscale = AutoScaleConnection(aws_access_key_id=aws.access_key, aws_secret_access_key=aws.secret_key, security_token=aws.security_token) self.s3 = S3Connection( aws_access_key_id=aws.access_key, aws_secret_access_key=aws.secret_key, security_token=aws.security_token, host=(lambda r: 's3.amazonaws.com' if r == 'us-east-1' else 's3-' + r + '.amazonaws.com')(aws.region)) self.app_name = app_name self.wait_time_secs = wait_time_secs def swap_environment_cnames(self, from_env_name, to_env_name): """ Swaps cnames for an environment """ self.ebs.swap_environment_cnames(source_environment_name=from_env_name, destination_environment_name=to_env_name) def upload_archive(self, filename, key, auto_create_bucket=True): """ Uploads an application archive version to s3 """ try: bucket = self.s3.get_bucket(self.aws.bucket) if (( self.aws.region != 'us-east-1' and self.aws.region != 'eu-west-1') and bucket.get_location() != self.aws.region) or ( self.aws.region == 'us-east-1' and bucket.get_location() != '') or ( self.aws.region == 'eu-west-1' and bucket.get_location() != 'eu-west-1'): raise Exception("Existing bucket doesn't match region") except S3ResponseError: bucket = self.s3.create_bucket(self.aws.bucket, location=self.aws.region) def __report_upload_progress(sent, total): if not sent: sent = 0 if not total: total = 0 out("Uploaded " + str(sent) + " bytes of " + str(total) \ + " (" + str(int(float(max(1, sent)) / float(total) * 100)) + "%)") # upload the new version k = Key(bucket) k.key = self.aws.bucket_path + key k.set_metadata('time', str(time())) k.set_contents_from_filename(filename, cb=__report_upload_progress, num_cb=10) def list_available_solution_stacks(self): """ Returns a list of available solution stacks """ stacks = self.ebs.list_available_solution_stacks() return stacks['ListAvailableSolutionStacksResponse']['ListAvailableSolutionStacksResult']['SolutionStacks'] def create_application(self, description=None): """ Creats an application and sets the helpers current app_name to the created application """ out("Creating application " + str(self.app_name)) self.ebs.create_application(self.app_name, description=description) def delete_application(self): """ Creats an application and sets the helpers current app_name to the created application """ out("Deleting application " + str(self.app_name)) self.ebs.delete_application(self.app_name, terminate_env_by_force=True) def application_exists(self): """ Returns whether or not the given app_name exists """ response = self.ebs.describe_applications(application_names=[self.app_name]) return len(response['DescribeApplicationsResponse']['DescribeApplicationsResult']['Applications']) > 0 def create_environment(self, env_name, version_label=None, solution_stack_name=None, cname_prefix=None, description=None, option_settings=None, tier_name='WebServer', tier_type='Standard', tier_version='1.1'): """ Creates a new environment """ out("Creating environment: " + str(env_name) + ", tier_name:" + str(tier_name) + ", tier_type:" + str(tier_type)) self.ebs.create_environment(self.app_name, env_name, version_label=version_label, solution_stack_name=solution_stack_name, cname_prefix=cname_prefix, description=description, option_settings=option_settings, tier_type=tier_type, tier_name=tier_name, tier_version=tier_version) def environment_exists(self, env_name, include_deleted=False): """ Returns whether or not the given environment exists """ response = self.ebs.describe_environments(application_name=self.app_name, environment_names=[env_name], include_deleted=include_deleted) return len(response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments']) > 0 \ and response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments'][0][ 'Status'] != 'Terminated' def environment_resources(self, env_name): """ Returns the description for the given environment's resources """ resp = self.ebs.describe_environment_resources(environment_name=env_name) return resp['DescribeEnvironmentResourcesResponse']['DescribeEnvironmentResourcesResult']['EnvironmentResources'] def get_env_sizing_metrics(self, env_name): asg = self.get_asg(env_name) if asg: return asg.min_size, asg.max_size, asg.desired_capacity else: return None, None, None def get_asg(self, env_name): asg_name = self.get_asg_name(env_name) asgs = self.autoscale.get_all_groups(names=[asg_name]) asg = None if asgs: asg = asgs[0] return asg def get_asg_name(self, env_name): resources = self.environment_resources(env_name) name = resources["AutoScalingGroups"][0]["Name"] return name def set_env_sizing_metrics(self, env_name, min_size, max_size): self.update_environment(env_name, option_settings=[ ("aws:autoscaling:asg", "MinSize", min_size), ("aws:autoscaling:asg", "MaxSize", max_size)]) def environment_data(self, env_name): """ Returns the description for the given environment """ response = self.ebs.describe_environments(application_name=self.app_name, environment_names=[env_name], include_deleted=False) return response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments'][0] def rebuild_environment(self, env_name): """ Rebuilds an environment """ out("Rebuilding " + str(env_name)) self.ebs.rebuild_environment(environment_name=env_name) def get_environments(self): """ Returns the environments """ response = self.ebs.describe_environments(application_name=self.app_name, include_deleted=False) return response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments'] def delete_environment(self, environment_name): """ Deletes an environment """ self.ebs.terminate_environment(environment_name=environment_name, terminate_resources=True) def update_environment(self, environment_name, description=None, option_settings=[], tier_type=None, tier_name=None, tier_version='1.0'): """ Updates an application version """ out("Updating environment: " + str(environment_name)) messages = self.ebs.validate_configuration_settings(self.app_name, option_settings, environment_name=environment_name) messages = messages['ValidateConfigurationSettingsResponse']['ValidateConfigurationSettingsResult']['Messages'] ok = True for message in messages: if message['Severity'] == 'error': ok = False out("[" + message['Severity'] + "] " + str(environment_name) + " - '" \ + message['Namespace'] + ":" + message['OptionName'] + "': " + message['Message']) self.ebs.update_environment( environment_name=environment_name, description=description, option_settings=option_settings, tier_type=tier_type, tier_name=tier_name, tier_version=tier_version) def get_previous_environment_for_subdomain(self, env_subdomain): """ Returns an environment name for the given cname """ def sanitize_subdomain(subdomain): return subdomain.lower() env_subdomain = sanitize_subdomain(env_subdomain) def match_cname(cname): subdomain = sanitize_subdomain(cname.split(".")[0]) return subdomain == env_subdomain def match_candidate(env): return env['Status'] != 'Terminated' \ and env.get('CNAME') \ and match_cname(env['CNAME']) envs = self.get_environments() candidates = [env for env in envs if match_candidate(env)] match = None if candidates: match = candidates[0]["EnvironmentName"] return match def deploy_version(self, environment_name, version_label): """ Deploys a version to an environment """ out("Deploying " + str(version_label) + " to " + str(environment_name)) self.ebs.update_environment(environment_name=environment_name, version_label=version_label) def get_versions(self): """ Returns the versions available """ response = self.ebs.describe_application_versions(application_name=self.app_name) return response['DescribeApplicationVersionsResponse']['DescribeApplicationVersionsResult']['ApplicationVersions'] def create_application_version(self, version_label, key): """ Creates an application version """ out("Creating application version " + str(version_label) + " for " + str(key)) self.ebs.create_application_version(self.app_name, version_label, s3_bucket=self.aws.bucket, s3_key=self.aws.bucket_path+key) def delete_unused_versions(self, versions_to_keep=10): """ Deletes unused versions """ # get versions in use environments = self.ebs.describe_environments(application_name=self.app_name, include_deleted=False) environments = environments['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments'] versions_in_use = [] for env in environments: versions_in_use.append(env['VersionLabel']) # get all versions versions = self.ebs.describe_application_versions(application_name=self.app_name) versions = versions['DescribeApplicationVersionsResponse']['DescribeApplicationVersionsResult'][ 'ApplicationVersions'] versions = sorted(versions, reverse=True, cmp=lambda x, y: cmp(x['DateCreated'], y['DateCreated'])) # delete versions in use for version in versions[versions_to_keep:]: if version['VersionLabel'] in versions_in_use: out("Not deleting " + version["VersionLabel"] + " because it is in use") else: out("Deleting unused version: " + version["VersionLabel"]) self.ebs.delete_application_version(application_name=self.app_name, version_label=version['VersionLabel']) sleep(2) def describe_events(self, environment_name, next_token=None, start_time=None): """ Describes events from the given environment """ events = self.ebs.describe_events( application_name=self.app_name, environment_name=environment_name, next_token=next_token, start_time=start_time) return (events['DescribeEventsResponse']['DescribeEventsResult']['Events'], events['DescribeEventsResponse']['DescribeEventsResult']['NextToken']) def wait_for_environments(self, environment_names, health=None, status=None, version_label=None, include_deleted=True, use_events=True): """ Waits for an environment to have the given version_label and to be in the green state """ # turn into a list if not isinstance(environment_names, (list, tuple)): environment_names = [environment_names] environment_names = environment_names[:] # print some stuff s = "Waiting for environment(s) " + (", ".join(environment_names)) + " to" if health is not None: s += " have health " + health else: s += " have any health" if version_label is not None: s += " and have version " + version_label if status is not None: s += " and have status " + status out(s) started = time() seen_events = list() for env_name in environment_names: (events, next_token) = self.describe_events(env_name, start_time=utcnow_isoformat()) for event in events: seen_events.append(event) delay = 10 while True: # bail if they're all good if len(environment_names) == 0: break # wait sleep(delay) # # get the env try: environments = self.ebs.describe_environments( application_name=self.app_name, environment_names=environment_names, include_deleted=include_deleted) except BotoServerError as e: if not e.error_code == 'Throttling': raise delay = min(60, int(delay * 1.5)) out("Throttling: setting delay to " + str(delay) + " seconds") continue environments = environments['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments'] if len(environments) <= 0: raise Exception("Couldn't find any environments") # loop through and wait for env in environments[:]: env_name = env['EnvironmentName'] # the message msg = "Environment " + env_name + " is " + str(env['Health']) if version_label is not None: msg = msg + " and has version " + str(env['VersionLabel']) if status is not None: msg = msg + " and has status " + str(env['Status']) # what we're doing good_to_go = True if health is not None: good_to_go = good_to_go and str(env['Health']) == health if status is not None: good_to_go = good_to_go and str(env['Status']) == status if version_label is not None: good_to_go = good_to_go and str(env['VersionLabel']) == version_label # allow a certain number of Red samples before failing if env['Status'] == 'Ready' and env['Health'] == 'Red': if 'RedCount' not in env: env['RedCount'] = 0 env['RedCount'] += 1 if env['RedCount'] > MAX_RED_SAMPLES: out('Deploy failed') raise Exception('Ready and red') # log it if good_to_go: out(msg + " ... done") environment_names.remove(env_name) else: out(msg + " ... waiting") # log events try: (events, next_token) = self.describe_events(env_name, start_time=utcnow_isoformat()) except BotoServerError as e: if not e.error_code == 'Throttling': raise delay = min(60, int(delay * 1.5)) out("Throttling: setting delay to " + str(delay) + " seconds") break for event in events: if event not in seen_events: out("["+event['Severity']+"] "+event['Message']) seen_events.append(event) # check the time elapsed = time() - started if elapsed > self.wait_time_secs: message = "Wait time for environment(s) {environments} to be {health} expired".format( environments=" and ".join(environment_names), health=(health or "Green") ) raise Exception(message)
class WatchData: datafile = "/tmp/watchdata.p" dry = False low_limit = 70 high_limit = 90 high_urgent = 95 stats_period = 120 history_size = 0 def __init__(self): self.name = '' self.instances = 0 self.new_desired = 0 self.desired = 0 self.instances_info = None self.previous_instances = 0 self.action = "" self.action_ts = 0 self.changed_ts = 0 self.total_load = 0 self.avg_load = 0 self.max_load = 0 self.up_ts = 0 self.down_ts= 0 self.max_loaded = None self.loads = {} self.measures = {} self.emergency = False self.history = None def __getstate__(self): """ Don't store these objets """ d = self.__dict__.copy() del d['ec2'] del d['cw'] del d['autoscale'] del d['group'] del d['instances_info'] return d def connect(self, groupname): self.ec2 = boto.connect_ec2() self.cw = CloudWatchConnection() self.autoscale = AutoScaleConnection() self.group = self.autoscale.get_all_groups(names=[groupname])[0] self.instances = len(self.group.instances) self.desired = self.group.desired_capacity self.name = groupname def get_instances_info(self): ids = [i.instance_id for i in self.group.instances] self.instances_info = self.ec2.get_only_instances(instance_ids = ids) def get_CPU_loads(self): """ Read instances load and store in data """ for instance in self.group.instances: load = self.get_instance_CPU_load(instance.instance_id) if load is None: continue self.total_load += load self.loads[instance.instance_id] = load if load > self.max_load: self.max_load = load self.max_loaded = instance.instance_id self.avg_load = self.total_load/self.instances def get_instance_CPU_load(self, instance): end = datetime.datetime.now() start = end - datetime.timedelta(seconds=300) m = self.cw.get_metric_statistics(self.stats_period, start, end, "CPUUtilization", "AWS/EC2", ["Average"], {"InstanceId": instance}) if len(m) > 0: self.measures[instance] = len(m) ordered = sorted(m, key=lambda x: x['Timestamp'], reverse=True) return ordered[0]['Average'] return None @classmethod def from_file(cls): try: data = pickle.load( open(cls.datafile, "rb" )) except: data = WatchData() return data def store(self, annotation = False): if self.history_size > 0: if not self.history: self.history = [] self.history.append([int(time.time()), len(self.group.instances), int(round(self.total_load))]) self.history = self.history[-self.history_size:] pickle.dump(self, open(self.datafile, "wb" )) if annotation: import utils text = json.dumps(self.__getstate__(), skipkeys=True) utils.store_annotation("ec2_watch", text) def check_too_low(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[instance] > 1 and self.instances > 1 and load < self.avg_load * 0.2 and load < 4: self.emergency = True self.check_avg_low() # Check if the desired instanes can be decreased self.action = "EMERGENCY LOW (%s %5.2f%%) " % (instance, load) self.kill_instance(instance) return True return self.emergency def check_too_high(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[instance] > 1 and load > self.high_urgent: self.emergency = True self.action = "EMERGENCY HIGH (%s %5.2f%%) " % (instance, load) if self.instances > 1 and load > self.avg_load * 1.5: self.action += " killing bad instance" self.kill_instance(instance) else: self.action += " increasing instances to %d" % (self.instances+1,) self.set_desired(self.instances+1) return True return self.emergency def check_avg_high(self): threshold = self.high_limit if self.instances == 1: threshold = threshold * 0.9 # Increase faster if there is just one instance if self.avg_load > threshold: self.action = "WARN, high load: %d -> %d " % (self.instances, self.instances + 1) self.set_desired(self.instances + 1) return True def check_avg_low(self): if self.instances <= self.group.min_size: return False if self.total_load/(self.instances-1) < self.low_limit: self.action = "low load: %d -> %d " % (self.instances, self.instances - 1) self.set_desired(self.instances - 1) def kill_instance(self, id): if self.action: print self.action print "Kill instance", id syslog.syslog(syslog.LOG_INFO, "ec2_watch kill_instance: %s instances: %d (%s)" % (id, self.instances, self.action)) if self.dry: return self.ec2.terminate_instances(instance_ids=[id]) self.action_ts = time.time() def set_desired(self, desired): if self.action: print self.action print "Setting instances from %d to %d" % (self.instances, desired) syslog.syslog(syslog.LOG_INFO, "ec2_watch set_desired: %d -> %d (%s)" % (self.instances, desired, self.action)) if self.dry: return if desired >= self.group.min_size: self.group.set_capacity(desired) self.action_ts = time.time() self.new_desired = desired
class MSBManager: def __init__(self, aws_access_key, aws_secret_key): self.ec2_conn = EC2Connection(aws_access_key, aws_secret_key) self.elb_conn = ELBConnection(aws_access_key, aws_secret_key) self.auto_scale_conn = AutoScaleConnection(aws_access_key, aws_secret_key) self.cloud_watch_conn = CloudWatchConnection(aws_access_key, aws_secret_key) self.default_cooldown = 60 def get_security_group(self, name): sgs = [g for g in self.ec2_conn.get_all_security_groups() if g.name == name] return sgs[0] if sgs else None def create_security_group(self, name, description): sgs = [g for g in self.ec2_conn.get_all_security_groups() if g.name == name] sg = sgs[0] if sgs else None if not sgs: sg = self.ec2_conn.create_security_group(name, description) try: sg.authorize(ip_protocol="-1", from_port=None, to_port=None, cidr_ip="0.0.0.0/0", dry_run=False) except EC2ResponseError: pass return sg def remove_security_group(self, name): self.ec2_conn.delete_security_group(name=name) def create_instance(self, image, instance_type, key_name, zone, security_groups, tags): instance = None reservations = self.ec2_conn.get_all_instances() for reservation in reservations: for i in reservation.instances: if "Name" in i.tags and i.tags["Name"] == tags["Name"] and i.state == "running": instance = i break if not instance: reservation = self.ec2_conn.run_instances( image, instance_type=instance_type, key_name=key_name, placement=zone, security_groups=security_groups, monitoring_enabled=True, ) instance = reservation.instances[0] while not instance.update() == "running": time.sleep(5) time.sleep(10) self.ec2_conn.create_tags([instance.id], tags) return instance def request_spot_instance(self, bid, image, instance_type, key_name, zone, security_groups, tags): req = self.ec2_conn.request_spot_instances( price=bid, instance_type=instance_type, image_id=image, placement=zone, key_name=key_name, security_groups=security_groups, ) instance_id = None while not instance_id: job_sir_id = req[0].id requests = self.ec2_conn.get_all_spot_instance_requests() for sir in requests: if sir.id == job_sir_id: instance_id = sir.instance_id break print "Job {} not ready".format(job_sir_id) time.sleep(60) self.ec2_conn.create_tags([instance_id], tags) def remove_instance(self, instance_id): self.remove_instances([instance_id]) def remove_instances(self, instance_ids): self.ec2_conn.terminate_instances(instance_ids) def remove_instance_by_tag_name(self, name): reservations = self.ec2_conn.get_all_instances() data_centers_intance_ids = [] for reservation in reservations: for instance in reservation.instances: if "Name" in instance.tags and instance.tags["Name"] == name and instance.state == "running": data_centers_intance_ids.append(instance.id) if data_centers_intance_ids: self.remove_instances(data_centers_intance_ids) def create_elb(self, name, zone, project_tag_value, security_group_id, instance_ids=None): lbs = [l for l in self.elb_conn.get_all_load_balancers() if l.name == name] lb = lbs[0] if lbs else None if not lb: hc = HealthCheck( timeout=50, interval=60, healthy_threshold=2, unhealthy_threshold=8, target="HTTP:80/heartbeat" ) ports = [(80, 80, "http")] zones = [zone] lb = self.elb_conn.create_load_balancer(name, zones, ports) self.elb_conn.apply_security_groups_to_lb(name, [security_group_id]) lb.configure_health_check(hc) if instance_ids: lb.register_instances(instance_ids) params = { "LoadBalancerNames.member.1": lb.name, "Tags.member.1.Key": "15619project", "Tags.member.1.Value": project_tag_value, } lb.connection.get_status("AddTags", params, verb="POST") return lb def remove_elb(self, name): self.elb_conn.delete_load_balancer(name) def create_launch_configuration(self, name, image, key_name, security_groups, instance_type): lcs = [l for l in self.auto_scale_conn.get_all_launch_configurations() if l.name == name] lc = lcs[0] if lcs else None if not lc: lc = LaunchConfiguration( name=name, image_id=image, key_name=key_name, security_groups=[security_groups], instance_type=instance_type, ) self.auto_scale_conn.create_launch_configuration(lc) return lc def remove_launch_configuration(self, name): self.auto_scale_conn.delete_launch_configuration(name) def create_autoscaling_group(self, name, lb_name, zone, tags, instance_ids=None): lc = self.create_launch_configuration() as_groups = [a for a in self.auto_scale_conn.get_all_groups() if a.name == name] as_group = as_groups[0] if as_groups else None if not as_group: as_group = AutoScalingGroup( group_name=name, load_balancers=[lb_name], availability_zones=[zone], launch_config=lc, min_size=4, max_size=4, health_check_type="ELB", health_check_period=120, connection=self.auto_scale_conn, default_cooldown=self.default_cooldown, desired_capacity=4, tags=tags, ) self.auto_scale_conn.create_auto_scaling_group(as_group) if instance_ids: self.auto_scale_conn.attach_instances(name, instance_ids) scale_up_policy = ScalingPolicy( name="scale_up", adjustment_type="ChangeInCapacity", as_name=name, scaling_adjustment=1, cooldown=self.default_cooldown, ) scale_down_policy = ScalingPolicy( name="scale_down", adjustment_type="ChangeInCapacity", as_name=name, scaling_adjustment=-1, cooldown=self.default_cooldown, ) self.auto_scale_conn.create_scaling_policy(scale_up_policy) self.auto_scale_conn.create_scaling_policy(scale_down_policy) scale_up_policy = self.auto_scale_conn.get_all_policies(as_group=name, policy_names=["scale_up"])[0] scale_down_policy = self.auto_scale_conn.get_all_policies(as_group=name, policy_names=["scale_down"])[0] alarm_dimensions = {"AutoScalingGroupName": name} scale_up_alarm = MetricAlarm( name="scale_up_on_cpu", namespace="AWS/EC2", metric="CPUUtilization", statistic="Average", comparison=">", threshold=85, period=60, evaluation_periods=1, alarm_actions=[scale_up_policy.policy_arn], dimensions=alarm_dimensions, ) self.cloud_watch_conn.create_alarm(scale_up_alarm) scale_down_alarm = MetricAlarm( name="scale_down_on_cpu", namespace="AWS/EC2", metric="CPUUtilization", statistic="Average", comparison="<", threshold=60, period=60, evaluation_periods=1, alarm_actions=[scale_down_policy.policy_arn], dimensions=alarm_dimensions, ) self.cloud_watch_conn.create_alarm(scale_down_alarm) return as_group def update_autoscaling_group_max_size(self, as_group, max_size): setattr(as_group, "max_size", max_size) as_group.update() def update_autoscaling_group_min_size(self, as_group, min_size): setattr(as_group, "min_size", min_size) as_group.update() def remove_autoscaling_group(self, name): self.auto_scale_conn.delete_auto_scaling_group(name)
class BotoScaleInterface(ScaleInterface): conn = None saveclcdata = False def __init__(self, clc_host, access_id, secret_key, token): #boto.set_stream_logger('foo') path='/services/AutoScaling' port=8773 if clc_host[len(clc_host)-13:] == 'amazonaws.com': clc_host = clc_host.replace('ec2', 'autoscaling', 1) path = '/' reg = None port=443 reg = RegionInfo(name='eucalyptus', endpoint=clc_host) self.conn = AutoScaleConnection(access_id, secret_key, region=reg, port=port, path=path, is_secure=True, security_token=token, debug=0) self.conn.https_validate_certificates = False self.conn.http_connection_kwargs['timeout'] = 30 def __save_json__(self, obj, name): f = open(name, 'w') json.dump(obj, f, cls=BotoJsonScaleEncoder, indent=2) f.close() ## # autoscaling methods ## def create_auto_scaling_group(self, as_group): return self.conn.create_auto_scaling_group(as_group) def delete_auto_scaling_group(self, name, force_delete=False): return self.conn.delete_auto_scaling_group(name, force_delete) def get_all_groups(self, names=None, max_records=None, next_token=None): obj = self.conn.get_all_groups(names, max_records, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_Groups.json") return obj def get_all_autoscaling_instances(self, instance_ids=None, max_records=None, next_token=None): obj = self.conn.get_all_autoscaling_instances(instance_ids, max_records, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_Instances.json") return obj def set_desired_capacity(self, group_name, desired_capacity, honor_cooldown=False): group = self.conn.get_all_groups([group_name])[0]; # notice, honor_cooldown not supported. return group.set_capacity(desired_capacity) def set_instance_health(self, instance_id, health_status, should_respect_grace_period=True): return self.conn.set_instance_health(instance_id, health_status, should_respect_grace_period) def terminate_instance(self, instance_id, decrement_capacity=True): return self.conn.terminate_instance(instance_id, decrement_capacity) def update_autoscaling_group(self, as_group): as_group.connection = self.conn return as_group.update() def create_launch_configuration(self, launch_config): return self.conn.create_launch_configuration(launch_config) def delete_launch_configuration(self, launch_config_name): return self.conn.delete_launch_configuration(launch_config_name) def get_all_launch_configurations(self, config_names, max_records, next_token): obj = self.conn.get_all_launch_configurations(names=config_names, max_records=max_records, next_token=next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_LaunchConfigs.json") return obj
class aws: def __init__(self,PREFIX='tfound-',ENV='dev',AMI='',TYPE='',SIZE='', DOMAIN='tfound',SSHKEY='myprivatekey',AWSKEY='',AWSSECRET='',AVAIL_ZONES=["us-east-1a","us-east-1b","us-east-1c","us-east-1d"]): ''' Shows examples Create load balancer group 'tfound-dev-web-lb' for web servers, in dev group for tfound: python control-lb-and-groups.py --createlb --env dev --aws SC --type web Add an instance to the load balancer group: python control-lb-and-groups.py --addtolb=true --env dev --aws SC --type web --instance=i-999999 Create launch config using ami ami-fa6b8393 (default), medium sized instance, and Autoscale Group 'tfound-dev-web-group' with a min of 2 instances, max 5, with health check on port 80: python control-lb-and-groups.py --createlc --ami ami-fa6b8393 --size c1.medium --env dev --aws SC --type web --createag --min 2 --max 5 Triggers/Health checks are hard coded to spawn new instances when total cpu reaches 60 percent or health check fails. ''' self.PREFIX=PREFIX+DOMAIN+'-'+ENV+'-'+TYPE self.ENV=ENV self.AMI=AMI self.TYPE=TYPE self.DOMAIN=DOMAIN self.SIZE=SIZE self.MIN=MIN self.MAX=MAX self.SSHKEY=SSHKEY self.AWSKEY=AWSKEY self.AWSSECRET=AWSSECRET self.AVAIL_ZONES=AVAIL_ZONES self.LBNAME=self.PREFIX+'-lb' self.AGNAME=self.PREFIX+'-group' self.TRNAME=self.PREFIX+'-trigger' self.LCNAME=self.PREFIX+'-launch_config' self.asconn=AutoScaleConnection(self.AWSKEY, self.AWSSECRET) self.elbconn = ELBConnection(aws_access_key_id=AWSKEY,aws_secret_access_key=AWSSECRET) self.lc = self._buildLaunchConfig() self.ag = self._buildAutoscaleGroup() def _buildLaunchConfig(self): return LaunchConfiguration(name=self.LCNAME, image_id=self.AMI, key_name=self.SSHKEY, security_groups=[self.ENV+'.'+self.TYPE], user_data='LAUNCHTAGS="'+self.ENV+' ' +self.TYPE+' '+self.DOMAIN+'";', instance_type=self.SIZE) def _buildAutoscaleGroup(self): return AutoScalingGroup(group_name=self.AGNAME, load_balancers=[self.LBNAME], availability_zones=self.AVAIL_ZONES, launch_config=self.lc, min_size=self.MIN, max_size=self.MAX) def getGroups(self): '''get existing lb groups''' # conn = AutoScaleConnection(AWSKEY, AWSSECRET) #conn = AutoScaleConnection() return self.asconn.get_all_groups() def getActivities(self,AUTOSCALE_GROUP=None): return self.asconn.get_all_activities(AUTOSCALE_GROUP) def createLaunchConfig(self): '''create Launch Configuration to define initial startup params ''' #conn = AutoScaleConnection(AWSKEY, AWSSECRET) #lc = self.buildLaunchConfig() return self.asconn.create_launch_configuration(self.lc) def createAutoscaleGroup(self): '''We now have created a launch configuration called tfound...launch-config. We are now ready to associate it with our new autoscale group. returns autoscale object ''' #conn = AutoScaleConnection(AWSKEY, AWSSECRET) #lc = self.buildLaunchConfig() return self.asconn.create_auto_scaling_group(self.ag) #conn.get_all_activities(ag) def createTrigger(self,AUTOSCALE_GROUP=None): ''' you create a trigger on a group, pass in a group object this creates a trigger that scales up to MAX instances if average cpu utilitzation goes over 60, scales down to MIN instances if under 40 avg cpu ''' #conn = AutoScaleConnection(AWSKEY, AWSSECRET) tr = Trigger(name=self.TRNAME, autoscale_group=AUTOSCALE_GROUP, measure_name='CPUUtilization', statistic='Average', unit='Percent', dimensions=[('AutoScalingGroupName', AUTOSCALE_GROUP.name), ('Namespace','AWS/EC2')], period=120, lower_threshold=10, lower_breach_scale_increment='-1', upper_threshold=30, upper_breach_scale_increment='1', breach_duration=360) return self.asconn.create_trigger(tr) def createHealthCheck(self): return HealthCheck('instance_health', interval=20, target='TCP:8080', timeout='5') def createLoadbalancer(self): #elbconn = ELBConnection(aws_access_key_id=AWSKEY,aws_secret_access_key=AWSSECRET) #conn = ELBConnection(aws_access_key_id=AWSKEY,aws_secret_access_key=AWSSECRET, # host='us-east-1a.elasticloadbalancing.amazonaws.com') # hc = HealthCheck('instance_health', interval=20, target='HTTP:80/index.php', timeout='5') # lb = conn.create_load_balancer('tfound-'+options.ENV+'-'+options.TYPE+'-lb', [options.ZONE], # [(80, 80, 'http'),(8000,8000, 'tcp')]) ## lb.delete() hc = self.createHealthCheck() lb = self.elbconn.create_load_balancer(self.LBNAME, [self.ZONE], [(8080,8080, 'tcp')]) lb.configure_health_check(hc) return lb.dns_name def addToLoadbalancer(self,INSTANCE=None): #from boto.ec2.elb import ELBConnection #from boto.ec2.elb import HealthCheck #conn = ELBConnection(aws_access_key_id=AWSKEY,aws_secret_access_key=AWSSECRET) # conn = ELBConnection(aws_access_key_id=AWSKEY,aws_secret_access_key=AWSSECRET, # host='us-east-1a.elasticloadbalancing.amazonaws.com') if INSTANCE == None: sys.stderr.write("Please provide instance id to add. not acceptble: %s\n" % options.INSTANCE ) raise SystemExit(1) lb = self.elbconn.register_instances(self.LBNAME, INSTANCE) #lbgroup = 'tfound-'+options.ENV+'-'+options.TYPE+'-lb' print "Added instance %s to %s\n" % (INSTANCE, self.LBNAME) def getLoadbalancers(self): return self.elbconn.get_all_load_balancers() def startInstances(self,TYPE='',NUM='',SIZE=''): return
conn_cw = boto.ec2.cloudwatch.connect_to_region(regionName) conn_cw = boto.ec2.cloudwatch.connect_to_region( regionName, aws_access_key_id = AWS_ACCESS_KEY, aws_secret_access_key = AWS_SECRET_KEY ) ######################### end configuration ################################ # balancers = elb.get_all_load_balancers() # print balancers[0] # retrieve the instances in the autoscale group group = conn_as.get_all_groups(names=[autoscaling_group['name']])[0] instanceids = [i.instance_id for i in group.instances] instances = conn_ec2.get_only_instances(instanceids) print instances # # shutdown all the instances in the autogroup instances ag = conn_as.get_all_groups()[0] print "shutdown the instances in the autoscaling group" ag.shutdown_instances() sleep(20) # # delete the autoscale group print "delete the autoscaling group" ag.delete()
class WatchData: datafile = "/var/tmp/watchdata.p" dry = False low_limit = 72 high_limit = 90 high_urgent = 95 stats_period = 60 history_size = 0 def __init__(self): self.name = '' self.instances = 0 self.new_desired = 0 self.desired = 0 self.instances_info = None self.previous_instances = 0 self.action = "" self.action_ts = 0 self.changed_ts = 0 self.total_load = 0 self.avg_load = 0 self.max_load = 0 self.up_ts = 0 self.down_ts = 0 self.max_loaded = None self.loads = {} self.measures = {} self.emergency = False self.history = None self.trend = 0 self.exponential_average = 0 self.ts = 0 def __getstate__(self): """ Don't store these objets """ d = self.__dict__.copy() del d['ec2'] del d['cw'] del d['autoscale'] del d['group'] del d['instances_info'] return d def connect(self, groupname): self.ec2 = boto.connect_ec2() self.cw = CloudWatchConnection() self.autoscale = AutoScaleConnection() self.group = self.autoscale.get_all_groups(names=[groupname])[0] self.instances = len(self.group.instances) self.desired = self.group.desired_capacity self.name = groupname self.ts = int(time.time()) def get_instances_info(self): ids = [i.instance_id for i in self.group.instances] self.instances_info = self.ec2.get_only_instances(instance_ids=ids) def get_CPU_loads(self): """ Read instances load and store in data """ measures = 0 for instance in self.group.instances: load = self.get_instance_CPU_load(instance.instance_id) if load is None: continue measures += 1 self.total_load += load self.loads[instance.instance_id] = load if load > self.max_load: self.max_load = load self.max_loaded = instance.instance_id if measures > 0: self.avg_load = self.total_load / measures def get_instance_CPU_load(self, instance): end = datetime.datetime.now() start = end - datetime.timedelta(seconds=int(self.stats_period * 3)) m = self.cw.get_metric_statistics(self.stats_period, start, end, "CPUUtilization", "AWS/EC2", ["Average"], {"InstanceId": instance}) if len(m) > 0: measures = self.measures[instance] = len(m) ordered = sorted(m, key=lambda x: x['Timestamp']) averages = [x['Average'] for x in ordered] average = reduce(lambda x, y: 0.4 * x + 0.6 * y, averages[-2:]) return average return None @classmethod def from_file(cls): try: data = pickle.load(open(cls.datafile, "rb")) except: data = WatchData() return data def store(self, annotation=False): if self.history_size > 0: if not self.history: self.history = [] self.history.append([ int(time.time()), len(self.group.instances), int(round(self.total_load)), int(round(self.avg_load)) ]) self.history = self.history[-self.history_size:] pickle.dump(self, open(self.datafile, "wb")) if annotation: import utils text = json.dumps(self.__getstate__(), skipkeys=True) utils.store_annotation("ec2_watch", text) def check_too_low(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[ instance] > 1 and self.instances > 1 and load < self.avg_load * 0.2 and load < 4: self.emergency = True self.check_avg_low( ) # Check if the desired instanes can be decreased self.action = "EMERGENCY LOW (%s %5.2f%%) " % (instance, load) self.kill_instance(instance) return True return self.emergency def check_too_high(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[ instance] > 1 and load > self.high_urgent: self.emergency = True self.action = "EMERGENCY HIGH (%s %5.2f%%) " % (instance, load) if self.instances > 1 and load > self.avg_load * 1.5: self.action += " killing bad instance" self.kill_instance(instance) else: self.action += " increasing instances to %d" % ( self.instances + 1, ) self.set_desired(self.instances + 1) return True return self.emergency def check_avg_high(self): threshold = self.high_limit if self.instances == 1: threshold = threshold * 0.9 # Increase faster if there is just one instance if self.avg_load > threshold: self.action = "WARN, high load: %d -> %d " % (self.instances, self.instances + 1) self.set_desired(self.instances + 1) return True def check_avg_low(self): if self.instances <= self.group.min_size: return False if self.total_load / (self.instances - 1) < self.low_limit: self.action = "low load: %d -> %d " % (self.instances, self.instances - 1) self.set_desired(self.instances - 1) def kill_instance(self, id): if self.action: print(self.action) print("Kill instance", id) syslog.syslog( syslog.LOG_INFO, "ec2_watch kill_instance: %s instances: %d (%s)" % (id, self.instances, self.action)) if self.dry: return self.ec2.terminate_instances(instance_ids=[id]) self.action_ts = time.time() def set_desired(self, desired): if self.action: print(self.action) print("Setting instances from %d to %d" % (self.instances, desired)) syslog.syslog( syslog.LOG_INFO, "ec2_watch set_desired: %d -> %d (%s)" % (self.instances, desired, self.action)) if self.dry: return if desired >= self.group.min_size: self.group.set_capacity(desired) self.action_ts = time.time() self.new_desired = desired
image = conn.get_image(ami_id) print "Image is now " + image.state # Connect autoscaling service print "\nStep 2 : Creating scaling configuration" aws_region_as = RegionInfo(name=region_name, endpoint=region_autoscale_endpoint) conn_as = AutoScaleConnection(aws_access_key_id, aws_secret_access_key,region=aws_region_as) # Create autoscaling configuration lc = LaunchConfiguration(name=build_name, image_id=ami_id, key_name=key_name, security_groups=security_group, instance_type=instance_type) conn_as.create_launch_configuration(lc) print "Autoscaling configuration ready : " + build_name # Upgrading autoscaling group print "\nStep 3 : Updating scaling group" print "Updating Scaling group with new conf & terminating all the existing instances in the scaling group" as_group = conn_as.get_all_groups(names=[scaling_group_name])[0] setattr(as_group,'launch_config_name',build_name) setattr(as_group,'desired_capacity',0) setattr(as_group,'min_size',0) as_group.update() # Launching new systems print "Waiting for 60 secs before launching new systems" time.sleep(60) setattr(as_group,'desired_capacity',as_desired_capacity) setattr(as_group,'min_size',as_min_size) as_group.update() print "Systems are being launched, Updation process complete.\n\nLife is that easy , Have fun scaling :) :)"
class BotoScaleInterface(ScaleInterface): conn = None saveclcdata = False def __init__(self, clc_host, access_id, secret_key, token): #boto.set_stream_logger('foo') path = '/services/AutoScaling' port = 8773 if clc_host[len(clc_host) - 13:] == 'amazonaws.com': clc_host = clc_host.replace('ec2', 'autoscaling', 1) path = '/' reg = None port = 443 reg = RegionInfo(name='eucalyptus', endpoint=clc_host) if boto.__version__ < '2.6': self.conn = AutoScaleConnection(access_id, secret_key, region=reg, port=port, path=path, is_secure=True, security_token=token, debug=0) else: self.conn = AutoScaleConnection(access_id, secret_key, region=reg, port=port, path=path, validate_certs=False, is_secure=True, security_token=token, debug=0) self.conn.http_connection_kwargs['timeout'] = 30 def __save_json__(self, obj, name): f = open(name, 'w') json.dump(obj, f, cls=BotoJsonScaleEncoder, indent=2) f.close() ## # autoscaling methods ## def create_auto_scaling_group(self, as_group): return self.conn.create_auto_scaling_group(as_group) def delete_auto_scaling_group(self, name, force_delete=False): return self.conn.delete_auto_scaling_group(name, force_delete) def get_all_groups(self, names=None, max_records=None, next_token=None): return [] obj = self.conn.get_all_groups(names, max_records, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_Groups.json") return obj def get_all_autoscaling_instances(self, instance_ids=None, max_records=None, next_token=None): return [] obj = self.conn.get_all_autoscaling_instances(instance_ids, max_records, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_Instances.json") return obj def set_desired_capacity(self, group_name, desired_capacity, honor_cooldown=False): return self.conn.set_desired_capacity(group_name, desired_capacity, honor_cooldown) def set_instance_health(self, instance_id, health_status, should_respect_grace_period=True): return self.conn.set_instance_health(instance_id, health_status, should_respect_grace_period) def terminate_instance(self, instance_id, decrement_capacity=True): return self.conn.terminate_instance(instance_id, decrement_capacity) def update_autoscaling_group(self, as_group): as_group.connection = self.conn return as_group.update() def create_launch_configuration(self, launch_config): return self.conn.create_launch_configuration(launch_config) def delete_launch_configuration(self, launch_config_name): return self.conn.delete_launch_configuration(launch_config_name) def get_all_launch_configurations(self, config_names, max_records, next_token): obj = self.conn.get_all_launch_configurations(names=config_names, max_records=max_records, next_token=next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_LaunchConfigs.json") return obj
class BotoScaleInterface(ScaleInterface): conn = None saveclcdata = False def __init__(self, clc_host, access_id, secret_key, token): self.access_id = access_id self.secret_key = secret_key self.token = token self.set_endpoint(clc_host) def set_endpoint(self, endpoint): #boto.set_stream_logger('scale') path = '/services/AutoScaling' reg = RegionInfo(name='eucalyptus', endpoint=endpoint) port = 8773 if endpoint[len(endpoint)-13:] == 'amazonaws.com': endpoint = endpoint.replace('ec2', 'autoscaling', 1) path = '/' reg = RegionInfo(endpoint=endpoint) port = 443 self.conn = AutoScaleConnection(self.access_id, self.secret_key, region=reg, port=port, path=path, is_secure=True, security_token=self.token, debug=0) self.conn.APIVersion = '2011-01-01' if not(endpoint[len(endpoint)-13:] == 'amazonaws.com'): self.conn.auth_region_name = 'Eucalyptus' self.conn.https_validate_certificates = False self.conn.http_connection_kwargs['timeout'] = 30 def __save_json__(self, obj, name): f = open(name, 'w') json.dump(obj, f, cls=BotoJsonScaleEncoder, indent=2) f.close() ## # autoscaling methods ## def create_auto_scaling_group(self, as_group): return self.conn.create_auto_scaling_group(as_group) def delete_auto_scaling_group(self, name, force_delete=False): return self.conn.delete_auto_scaling_group(name, force_delete) def get_all_groups(self, names=None, max_records=None, next_token=None): obj = self.conn.get_all_groups(names, max_records, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_Groups.json") return obj def get_all_autoscaling_instances(self, instance_ids=None, max_records=None, next_token=None): obj = self.conn.get_all_autoscaling_instances(instance_ids, max_records, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_Instances.json") return obj def set_desired_capacity(self, group_name, desired_capacity, honor_cooldown=False): group = self.conn.get_all_groups([group_name])[0]; # notice, honor_cooldown not supported. return group.set_capacity(desired_capacity) def set_instance_health(self, instance_id, health_status, should_respect_grace_period=True): return self.conn.set_instance_health(instance_id, health_status, should_respect_grace_period) def terminate_instance(self, instance_id, decrement_capacity=True): return self.conn.terminate_instance(instance_id, decrement_capacity) def update_autoscaling_group(self, as_group): as_group.connection = self.conn return as_group.update() def create_launch_configuration(self, launch_config): return self.conn.create_launch_configuration(launch_config) def delete_launch_configuration(self, launch_config_name): return self.conn.delete_launch_configuration(launch_config_name) def get_all_launch_configurations(self, config_names=None, max_records=None, next_token=None): obj = self.conn.get_all_launch_configurations(names=config_names, max_records=max_records, next_token=next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_LaunchConfigs.json") return obj # policy related def delete_policy(self, policy_name, autoscale_group=None): return self.conn.delete_policy(policy_name, autoscale_group) def get_all_policies(self, as_group=None, policy_names=None, max_records=None, next_token=None): obj = self.conn.get_all_policies(as_group, policy_names, max_records, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_Policies.json") return obj def execute_policy(self, policy_name, as_group=None, honor_cooldown=None): return self.conn.execute_policy(policy_name, as_group, honor_cooldown) def create_scaling_policy(self, scaling_policy): return self.conn.create_scaling_policy(scaling_policy) def get_all_adjustment_types(self): return self.conn.get_all_adjustment_types() # tag related def delete_tags(self, tags): return self.conn.delete_tags(tags) def get_all_tags(self, filters=None, max_records=None, next_token=None): obj = self.conn.get_all_tags(filters, max_records, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_Tags.json") return obj def create_or_update_tags(self, tags): return self.conn.create_or_update_tags(tags)
def main(): parser = argparse.ArgumentParser( description = "triggers a full LSDA rollout") parser.add_argument("--inspect", action = "store_true", help = "pause before baking AMI", default = False) parser.add_argument("--clean", action = "store_true", help = "reset from clean Ubuntu 12.04 image", default = False) parser.add_argument("--no-restart", action = "store_true", dest = "no_restart", help = "don't restart all nodes in ASG", default = False) options = parser.parse_args() logging.info("Starting rollout.") conn_ec2 = boto.ec2.connect_to_region("us-east-1") conn_ec2_as = AutoScaleConnection() if not options.clean: logging.info("Searching for existing images...") group = conn_ec2_as.get_all_groups(['LSDA Worker Pool'])[0] launch_config = conn_ec2_as.get_all_launch_configurations( names=[group.launch_config_name])[0] existing_images = conn_ec2.get_all_images(owners = ["self"])[0] ami_id = launch_config.image_id logging.info("Using existing image {0}".format(ami_id)) else: ami_id = 'ami-59a4a230' # Clean Ubuntu 12.04. logging.info("Using base image {0}".format(ami_id)) reservation = conn_ec2.run_instances( image_id = ami_id, key_name = 'jeremy-aws-key', instance_type = 't1.micro', security_groups = ['Worker Nodes'], ) try: instance = reservation.instances[0] logging.info("Waiting for instance {} to start...".format(instance.id)) instance.update() while instance.ip_address is None: logging.info("Not ready. Retrying in 10 seconds...") time.sleep(10) instance.update() while True: result = subprocess.call(["ssh", "-o", "UserKnownHostsFile=/dev/null", "-o", "StrictHostKeyChecking=no", "ubuntu@{}".format(instance.ip_address), "uname -r"]) if result != 0: logging.info("Not ready for SSH. Retrying in 10 seconds...") time.sleep(10) else: break logging.info("Instance has started; running setup script.") logging.info("(IP address is {})".format(instance.ip_address)) subprocess.check_call(["ssh", "-o", "UserKnownHostsFile=/dev/null", "-o", "StrictHostKeyChecking=no", "ubuntu@{}".format(instance.ip_address), "sudo stop lsda; sleep 20; sudo rm worker.sh;" "wget https://raw.github.com/fatlotus/lsda-infrastructure/" "master/servers/worker.sh; sudo bash worker.sh"]) if options.inspect: logging.info("Connect to ubuntu@{} to inspect the image." .format(instance.ip_address)) logging.info("When you're done, press CTRL-C.") try: while True: time.sleep(3600) except KeyboardInterrupt: pass logging.info("Creating AMI from existing image.") new_image = instance.create_image( name = ('Latest-{:%Y-%m-%d--%H-%M-%S}'. format(datetime.datetime.now())), description = "(automatically generated)" ) time.sleep(10) image_object = conn_ec2.get_image(new_image) while image_object.state == "pending": logging.info("State is still pending. Retrying in 10 seconds.") time.sleep(10) image_object.update() finally: logging.warn("Stopping all nodes.") for node in reservation.instances: node.terminate() logging.info("Creating new LaunchConfiguration.") mapping = BlockDeviceMapping() mapping["/dev/sdb"] = BlockDeviceType(ephemeral_name = "ephemeral0") mapping["/dev/sdc"] = BlockDeviceType(ephemeral_name = "ephemeral1") new_launch_config = LaunchConfiguration( conn_ec2_as, name = ('Latest-{:%Y-%m-%d--%H-%M-%S}'. format(datetime.datetime.now())), image_id = new_image, security_groups = ['sg-f9a08492'], instance_type = 'c3.large', block_device_mappings = [mapping], instance_profile_name = ("arn:aws:iam::470084502640:instance-profile" "/dal-access"), spot_price = 0.02, ) conn_ec2_as.create_launch_configuration(new_launch_config) logging.info("Setting launch configuration in existing ASG.") group.launch_config_name = new_launch_config.name group.update() logging.info("Cleaning up old launch configurations.") for config in conn_ec2_as.get_all_launch_configurations(): if config.image_id != new_launch_config.image_id: conn_ec2_as.delete_launch_configuration(config.name) logging.info("Cleaning up old images.") for image in conn_ec2.get_all_images(filters={"name":["LatestImage"]}): if image.id != new_image: conn_ec2.deregister_image(image.id, True) logging.info("Rollout complete. New image is {}.".format(new_image)) if not options.no_restart: logging.info("Triggering reload of all nodes in ASG.") for instance in group.instances: for reservation in conn_ec2.get_all_instances(instance.instance_id): reservation.stop_all()
def add_ingress_rule(dry_run, go_agent_security_group, go_agent_security_group_owner, go_agent_security_group_name): """ For each ASG (app) in each VPC, add a rule to each SG associated with the ASG's launch configuration that allows SSH ingress from the GoCD agents' SG. BEFORE RUNNING THIS SCRIPT!: - Use the assume_role bash script to assume the role in the proper account/VPC (edx, edge, mckinsey, etc.) - If you don't know what this is, ask someone in DevOps. - THEN run this script. """ asg_conn = AutoScaleConnection() ec2_conn = boto.ec2.connect_to_region('us-east-1') asgs = [] launch_configs = {} security_groups = {} logging.debug('All ASGs:') for group in asg_conn.get_all_groups(): logging.debug(' {}'.format(group)) asgs.append(group) logging.debug('All launch configurations:') for launch_config in asg_conn.get_all_launch_configurations(): logging.debug(' {}'.format(launch_config)) launch_configs[launch_config.name] = launch_config logging.debug('All security groups:') for sec_group in ec2_conn.get_all_security_groups(): logging.debug(' {}'.format(sec_group)) security_groups[sec_group.id] = sec_group # Validate that each ASG has a launch configuration. for group in asgs: try: logging.info("Launch configuration for ASG '{}' is '{}'.".format( group.name, launch_configs[group.launch_config_name] )) except KeyError: logging.error("Launch configuration '{}' for ASG '{}' was not found!".format( group.launch_config_name, group.name )) raise # Construct a fake security group for the prod-tools-goagent-sg security group in the edx-tools account. # This group will be used to grant the go-agents ingress into the ASG's VPCs. go_agent_security_group = boto.ec2.securitygroup.SecurityGroup( name=go_agent_security_group_name, owner_id=go_agent_security_group_owner, id=go_agent_security_group ) # For each launch config, check for the security group. Can support multiple security groups # but the edX DevOps convention is to use a single security group. for group in asgs: launch_config = launch_configs[group.launch_config_name] if len(launch_config.security_groups) > 1: err_msg = "Launch config '{}' for ASG '{}' has more than one security group!: {}".format( launch_config.name, group.name, launch_config.security_groups ) logging.warning(err_msg) continue sg_name = launch_config.security_groups[0] try: # Find the security group. sec_group = security_groups[sg_name] except KeyError: logging.error("Security group '{}' for ASG '{}' was not found!.".format(sg_name, group.name)) logging.info('BEFORE: Rules for security group {}:'.format(sec_group.name)) logging.info(sec_group.rules) try: # Add the ingress rule to the security group. yes_no = raw_input("Apply the change to this security group? [Yes]") if yes_no in ("", "y", "Y", "yes"): sec_group.authorize( ip_protocol='tcp', from_port=22, to_port=22, src_group=go_agent_security_group, dry_run=dry_run ) except boto.exception.EC2ResponseError as exc: if exc.status == 412: # If the dry_run flag is set, then each rule addition will raise this exception. # Log it and carry on. logging.info('Dry run is True but rule addition would have succeeded for security group {}.'.format( sg_name )) elif exc.code == "InvalidPermission.Duplicate": logging.info("Rule already exists for {}.".format(sg_name)) else: raise logging.info('AFTER: Rules for security group {}:'.format(sg_name)) logging.info(sec_group.rules)
class EbsHelper(object): """ Class for helping with ebs """ def __init__( self, aws, wait_time_secs, app_name=None, ): """ Creates the EbsHelper """ self.aws = aws self.ebs = connect_to_region(aws.region, aws_access_key_id=aws.access_key, aws_secret_access_key=aws.secret_key, security_token=aws.security_token) self.autoscale = AutoScaleConnection( aws_access_key_id=aws.access_key, aws_secret_access_key=aws.secret_key, security_token=aws.security_token) self.s3 = S3Connection( aws_access_key_id=aws.access_key, aws_secret_access_key=aws.secret_key, security_token=aws.security_token, host=(lambda r: 's3.amazonaws.com' if r == 'us-east-1' else 's3-' + r + '.amazonaws.com')( aws.region)) self.app_name = app_name self.wait_time_secs = wait_time_secs def swap_environment_cnames(self, from_env_name, to_env_name): """ Swaps cnames for an environment """ self.ebs.swap_environment_cnames( source_environment_name=from_env_name, destination_environment_name=to_env_name) def upload_archive(self, filename, key, auto_create_bucket=True): """ Uploads an application archive version to s3 """ try: bucket = self.s3.get_bucket(self.aws.bucket) if ((self.aws.region != 'us-east-1' and self.aws.region != 'eu-west-1') and bucket.get_location() != self.aws.region) or ( self.aws.region == 'us-east-1' and bucket.get_location() != '') or ( self.aws.region == 'eu-west-1' and bucket.get_location() != 'eu-west-1'): raise Exception("Existing bucket doesn't match region") except S3ResponseError: bucket = self.s3.create_bucket(self.aws.bucket, location=self.aws.region) def __report_upload_progress(sent, total): if not sent: sent = 0 if not total: total = 0 out("Uploaded " + str(sent) + " bytes of " + str(total) \ + " (" + str(int(float(max(1, sent)) / float(total) * 100)) + "%)") # upload the new version k = Key(bucket) k.key = self.aws.bucket_path + key k.set_metadata('time', str(time())) k.set_contents_from_filename(filename, cb=__report_upload_progress, num_cb=10) def list_available_solution_stacks(self): """ Returns a list of available solution stacks """ stacks = self.ebs.list_available_solution_stacks() return stacks['ListAvailableSolutionStacksResponse'][ 'ListAvailableSolutionStacksResult']['SolutionStacks'] def create_application(self, description=None): """ Creats an application and sets the helpers current app_name to the created application """ out("Creating application " + str(self.app_name)) self.ebs.create_application(self.app_name, description=description) def delete_application(self): """ Creats an application and sets the helpers current app_name to the created application """ out("Deleting application " + str(self.app_name)) self.ebs.delete_application(self.app_name, terminate_env_by_force=True) def application_exists(self): """ Returns whether or not the given app_name exists """ response = self.ebs.describe_applications( application_names=[self.app_name]) return len(response['DescribeApplicationsResponse'] ['DescribeApplicationsResult']['Applications']) > 0 def create_environment(self, env_name, version_label=None, solution_stack_name=None, cname_prefix=None, description=None, option_settings=None, tier_name='WebServer', tier_type='Standard', tier_version='1.1'): """ Creates a new environment """ out("Creating environment: " + str(env_name) + ", tier_name:" + str(tier_name) + ", tier_type:" + str(tier_type)) self.ebs.create_environment(self.app_name, env_name, version_label=version_label, solution_stack_name=solution_stack_name, cname_prefix=cname_prefix, description=description, option_settings=option_settings, tier_type=tier_type, tier_name=tier_name, tier_version=tier_version) def environment_exists(self, env_name, include_deleted=False): """ Returns whether or not the given environment exists """ response = self.ebs.describe_environments( application_name=self.app_name, environment_names=[env_name], include_deleted=include_deleted) return len(response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments']) > 0 \ and response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments'][0][ 'Status'] != 'Terminated' def environment_resources(self, env_name): """ Returns the description for the given environment's resources """ resp = self.ebs.describe_environment_resources( environment_name=env_name) return resp['DescribeEnvironmentResourcesResponse'][ 'DescribeEnvironmentResourcesResult']['EnvironmentResources'] def get_env_sizing_metrics(self, env_name): asg = self.get_asg(env_name) if asg: return asg.min_size, asg.max_size, asg.desired_capacity else: return None, None, None def get_asg(self, env_name): asg_name = self.get_asg_name(env_name) asgs = self.autoscale.get_all_groups(names=[asg_name]) asg = None if asgs: asg = asgs[0] return asg def get_asg_name(self, env_name): resources = self.environment_resources(env_name) name = resources["AutoScalingGroups"][0]["Name"] return name def set_env_sizing_metrics(self, env_name, min_size, max_size): self.update_environment(env_name, option_settings=[("aws:autoscaling:asg", "MinSize", min_size), ("aws:autoscaling:asg", "MaxSize", max_size)]) def environment_data(self, env_name): """ Returns the description for the given environment """ response = self.ebs.describe_environments( application_name=self.app_name, environment_names=[env_name], include_deleted=False) return response['DescribeEnvironmentsResponse'][ 'DescribeEnvironmentsResult']['Environments'][0] def rebuild_environment(self, env_name): """ Rebuilds an environment """ out("Rebuilding " + str(env_name)) self.ebs.rebuild_environment(environment_name=env_name) def get_environments(self): """ Returns the environments """ response = self.ebs.describe_environments( application_name=self.app_name, include_deleted=False) return response['DescribeEnvironmentsResponse'][ 'DescribeEnvironmentsResult']['Environments'] def delete_environment(self, environment_name): """ Deletes an environment """ self.ebs.terminate_environment(environment_name=environment_name, terminate_resources=True) def update_environment(self, environment_name, description=None, option_settings=[], tier_type=None, tier_name=None, tier_version='1.0'): """ Updates an application version """ out("Updating environment: " + str(environment_name)) messages = self.ebs.validate_configuration_settings( self.app_name, option_settings, environment_name=environment_name) messages = messages['ValidateConfigurationSettingsResponse'][ 'ValidateConfigurationSettingsResult']['Messages'] ok = True for message in messages: if message['Severity'] == 'error': ok = False out("[" + message['Severity'] + "] " + str(environment_name) + " - '" \ + message['Namespace'] + ":" + message['OptionName'] + "': " + message['Message']) self.ebs.update_environment(environment_name=environment_name, description=description, option_settings=option_settings, tier_type=tier_type, tier_name=tier_name, tier_version=tier_version) def get_previous_environment_for_subdomain(self, env_subdomain): """ Returns an environment name for the given cname """ def sanitize_subdomain(subdomain): return subdomain.lower() env_subdomain = sanitize_subdomain(env_subdomain) def match_cname(cname): subdomain = sanitize_subdomain(cname.split(".")[0]) return subdomain == env_subdomain def match_candidate(env): return env['Status'] != 'Terminated' \ and env.get('CNAME') \ and match_cname(env['CNAME']) envs = self.get_environments() candidates = [env for env in envs if match_candidate(env)] match = None if candidates: match = candidates[0]["EnvironmentName"] return match def deploy_version(self, environment_name, version_label): """ Deploys a version to an environment """ out("Deploying " + str(version_label) + " to " + str(environment_name)) self.ebs.update_environment(environment_name=environment_name, version_label=version_label) def get_versions(self): """ Returns the versions available """ response = self.ebs.describe_application_versions( application_name=self.app_name) return response['DescribeApplicationVersionsResponse'][ 'DescribeApplicationVersionsResult']['ApplicationVersions'] def create_application_version(self, version_label, key): """ Creates an application version """ out("Creating application version " + str(version_label) + " for " + str(key)) self.ebs.create_application_version(self.app_name, version_label, s3_bucket=self.aws.bucket, s3_key=self.aws.bucket_path + key) def delete_unused_versions(self, versions_to_keep=10): """ Deletes unused versions """ # get versions in use environments = self.ebs.describe_environments( application_name=self.app_name, include_deleted=False) environments = environments['DescribeEnvironmentsResponse'][ 'DescribeEnvironmentsResult']['Environments'] versions_in_use = [] for env in environments: versions_in_use.append(env['VersionLabel']) # get all versions versions = self.ebs.describe_application_versions( application_name=self.app_name) versions = versions['DescribeApplicationVersionsResponse'][ 'DescribeApplicationVersionsResult']['ApplicationVersions'] versions = sorted( versions, reverse=True, cmp=lambda x, y: cmp(x['DateCreated'], y['DateCreated'])) # delete versions in use for version in versions[versions_to_keep:]: if version['VersionLabel'] in versions_in_use: out("Not deleting " + version["VersionLabel"] + " because it is in use") else: out("Deleting unused version: " + version["VersionLabel"]) self.ebs.delete_application_version( application_name=self.app_name, version_label=version['VersionLabel']) sleep(2) def describe_events(self, environment_name, next_token=None, start_time=None): """ Describes events from the given environment """ events = self.ebs.describe_events(application_name=self.app_name, environment_name=environment_name, next_token=next_token, start_time=start_time) return ( events['DescribeEventsResponse']['DescribeEventsResult']['Events'], events['DescribeEventsResponse']['DescribeEventsResult'] ['NextToken']) def wait_for_environments(self, environment_names, health=None, status=None, version_label=None, include_deleted=True, use_events=True): """ Waits for an environment to have the given version_label and to be in the green state """ # turn into a list if not isinstance(environment_names, (list, tuple)): environment_names = [environment_names] environment_names = environment_names[:] # print some stuff s = "Waiting for environment(s) " + ( ", ".join(environment_names)) + " to" if health is not None: s += " have health " + health else: s += " have any health" if version_label is not None: s += " and have version " + version_label if status is not None: s += " and have status " + status out(s) started = time() seen_events = list() for env_name in environment_names: (events, next_token) = self.describe_events(env_name, start_time=utcnow_isoformat()) for event in events: seen_events.append(event) delay = 10 while True: # bail if they're all good if len(environment_names) == 0: break # wait sleep(delay) # # get the env try: environments = self.ebs.describe_environments( application_name=self.app_name, environment_names=environment_names, include_deleted=include_deleted) except BotoServerError as e: if not e.error_code == 'Throttling': raise delay = min(60, int(delay * 1.5)) out("Throttling: setting delay to " + str(delay) + " seconds") continue environments = environments['DescribeEnvironmentsResponse'][ 'DescribeEnvironmentsResult']['Environments'] if len(environments) <= 0: raise Exception("Couldn't find any environments") # loop through and wait for env in environments[:]: env_name = env['EnvironmentName'] # the message msg = "Environment " + env_name + " is " + str(env['Health']) if version_label is not None: msg = msg + " and has version " + str(env['VersionLabel']) if status is not None: msg = msg + " and has status " + str(env['Status']) # what we're doing good_to_go = True if health is not None: good_to_go = good_to_go and str(env['Health']) == health if status is not None: good_to_go = good_to_go and str(env['Status']) == status if version_label is not None: good_to_go = good_to_go and str( env['VersionLabel']) == version_label # allow a certain number of Red samples before failing if env['Status'] == 'Ready' and env['Health'] == 'Red': if 'RedCount' not in env: env['RedCount'] = 0 env['RedCount'] += 1 if env['RedCount'] > MAX_RED_SAMPLES: out('Deploy failed') raise Exception('Ready and red') # log it if good_to_go: out(msg + " ... done") environment_names.remove(env_name) else: out(msg + " ... waiting") # log events try: (events, next_token) = self.describe_events( env_name, start_time=utcnow_isoformat()) except BotoServerError as e: if not e.error_code == 'Throttling': raise delay = min(60, int(delay * 1.5)) out("Throttling: setting delay to " + str(delay) + " seconds") break for event in events: if event not in seen_events: out("[" + event['Severity'] + "] " + event['Message']) seen_events.append(event) # check the time elapsed = time() - started if elapsed > self.wait_time_secs: message = "Wait time for environment(s) {environments} to be {health} expired".format( environments=" and ".join(environment_names), health=(health or "Green")) raise Exception(message)
class Cloud(object): def __init__(self, cloud_config): self.config = cloud_config self.all_instances = [] self.failed_launch = False self.failed_count = 0 self.failed_last_valid_count = 0 self._conn = None self._as_conn = None self._lc = None self._asg = None self._last_asg_launch_attempt = None self.maxed = False self._last_launch_attempt = datetime.datetime.utcnow() self._initialize() def _create_connection(self): LOG.debug("Creating connection for %s" % self.config.name) self._conn = boto.connect_ec2(self.config.access_id, self.config.secret_key, validate_certs=False) self._conn.host = self.config.cloud_uri self._conn.port = self.config.cloud_port def _create_autoscale_connection(self): LOG.debug("Creating autoscale connection for %s" % self.config.name) region = RegionInfo(name=self.config.cloud_type, endpoint=self.config.as_uri) self._as_conn = AutoScaleConnection( aws_access_key_id=self.config.access_id, aws_secret_access_key=self.config.secret_key, is_secure=True, port=self.config.as_port, region=region, validate_certs=False) def _create_or_set_launch_configuration(self): name = self.config.lc_name if not self._lc: LOG.debug("Attempting to load launch configuration: %s" % (name)) lc = self._as_conn.get_all_launch_configurations(names=[name]) if len(lc) == 1: LOG.debug("Launch configuration %s found." % (name)) self._lc = lc[0] if not self._lc: #TODO(pdmars): key and security groups are hardcoded for now, gross if self.config.user_data_file is not None: user_data_file = self.config.user_data_file with open(user_data_file) as f: user_data = f.read() else: user_data = None LOG.debug("Creating launch configuration %s" % name) LOG.debug("\tname: %s" % name) LOG.debug("\timage_id: %s" % self.config.image_id) LOG.debug("\tinstance_type: %s" % self.config.instance_type) LOG.debug("\tuser_data: %s" % user_data) self._lc = LaunchConfiguration( name=name, image_id=self.config.image_id, key_name="phantomkey", security_groups=['default'], instance_type=self.config.instance_type, user_data=user_data) self._as_conn.create_launch_configuration(self._lc) def _create_or_set_autoscale_group(self): name = self.config.asg_name if not self._asg: LOG.debug("Attempting to load autoscale group: %s" % name) asg = self._as_conn.get_all_groups(names=[name]) LOG.debug("Autoscale group: %s" % asg) if len(asg) == 1: LOG.debug("Autoscale group %s found." % name) self._asg = asg[0] if not self._asg: # TODO(pdmars): more hard coded grossness, for now try: cloud_guess = self.config.lc_name.split("@")[1].strip() except Exception as e: LOG.warn("Unable to guess cloud for auto scale tags") LOG.warn("Setting cloud to hotel") cloud_guess = "hotel" policy_name_key = "PHANTOM_DEFINITION" policy_name = "error_overflow_n_preserving" ordered_clouds_key = "clouds" n_preserve_key = "minimum_vms" ordered_clouds = cloud_guess + ":-1" n_preserve = 0 policy_tag = Tag(connection=self._as_conn, key=policy_name_key, value=policy_name, resource_id=name) clouds_tag = Tag(connection=self._as_conn, key=ordered_clouds_key, value=ordered_clouds, resource_id=name) npreserve_tag = Tag(connection=self._as_conn, key=n_preserve_key, value=n_preserve, resource_id=name) tags = [policy_tag, clouds_tag, npreserve_tag] zones = [self.config.az] LOG.debug("Creating autoscale group %s" % name) LOG.debug("\tname: %s" % name) LOG.debug("\tavailability_zones: %s" % zones) LOG.debug("\tlaunch_config: %s" % self._lc) self._asg = AutoScalingGroup(group_name=name, availability_zones=zones, min_size=0, max_size=0, launch_config=self._lc, tags=tags) self._as_conn.create_auto_scaling_group(self._asg) def _initialize(self): LOG.debug("Initializing %s" % self.config.name) self._create_connection() self._create_autoscale_connection() self._create_or_set_launch_configuration() self._create_or_set_autoscale_group() LOG.debug("Initialization complete for %s" % self.config.name) def get_valid_instances(self): return self.all_instances def _refresh_instances(self): LOG.debug("%s: getting instance information" % self.config.name) self.all_instances = [] instances = [] as_instances = self._asg.instances as_instance_ids = [i.instance_id for i in as_instances] reservations = self._conn.get_all_instances() for reservation in reservations: for instance in reservation.instances: if instance.id in as_instance_ids: if instance.state in VALID_RUN_STATES: instances.append(instance) for instance in instances: self.all_instances.append(instance) num_instances = len(self.all_instances) LOG.debug("%s: updated %d instances" % (self.config.name, num_instances)) if num_instances >= self.config.max_instances: LOG.warn("%s reached the max (%s) instances: %s" % ( self.config.name, self.config.max_instances, num_instances)) self.maxed = True else: self.maxed = False def _refresh_asg(self): LOG.debug("%s: refreshing autoscale group" % self.config.name) asg_name = self.config.asg_name asgs = self._as_conn.get_all_groups(names=[asg_name]) if len(asgs) == 1: self._asg = asgs[0] LOG.debug("\trefreshed autoscale group: %s" % asg_name) else: LOG.warn("\tunable to refresh autoscale group: %s" % asg_name) def refresh(self, cluster): self._refresh_asg() self._refresh_instances() def get_total_num_valid_cores(self): LOG.debug("%s: getting number of valid cores" % self.config.name) total_num_valid_cores = 0 num_valid_instances = len(self.get_valid_instances()) total_valid_cores = num_valid_instances * self.config.instance_cores num_desired_instances = self._asg.desired_capacity num_desired_cores = num_desired_instances * self.config.instance_cores if num_desired_cores != total_num_valid_cores: LOG.debug("\tmismatching core counts") LOG.debug("\tnum_desired_cores: %d" % (num_desired_cores)) LOG.debug("\ttotal_valid_cores: %d" % (total_valid_cores)) return total_valid_cores def get_instance_by_id(self, id): LOG.debug("Searching for instance %s" % id) for instances in self.all_instances: if instance.id == id: LOG.debug("Found instance %s" % id) return instance return None def get_instance_ids_for_public_dns_names(self, public_dns_names): instance_ids = [] for instance in self.all_instances: if instance.public_dns_name in public_dns_names: instance_ids.append(instance.id) return instance_ids def get_public_dns_names_close_to_charge(self): instances_close_to_charge = [] sleep_secs = self.config.get_loop_sleep_secs() cur_utc_time = datetime.datetime.utcnow() valid_instances = self.get_valid_instances() time_fmt = "%Y-%m-%dT%H:%M:%S.%fZ" for instance in valid_instances: launch_time = datetime.datetime.strptime(instance.launch_time, time_fmt) time_diff = cur_utc_time - launch_time # Ignores microseconds time_diff_secs = time_diff.seconds + time_diff.days * 24 * 3600 cur_charge_secs = time_diff_secs % self.config.charge_time_secs secs_to_charge = self.config.charge_time_secs - cur_charge_secs LOG.debug("%s:%s: charge: %d; current: %d; to charge: %d" % ( instance.id, instance.public_dns_name, self.config.charge_time_secs, cur_charge_secs, secs_to_charge)) if secs_to_charge < (3 * sleep_secs): instances_close_to_charge.append(instance.public_dns_name) return instances_close_to_charge def delete_instances(self, instance_ids=[]): if not instance_ids: return LOG.debug("Deleting instances: %s" % instance_ids) # TODO(pdmars): this has the potential to kill instances running jobs # maybe I should err on the side of having extra instances if the # capacity is higher than the cloud can currently support num_instances = len(self.all_instances) if ((self._asg.desired_capacity > num_instances) and (num_instances > 0)): LOG.warn("Desired capacity is greater than num_instances running") LOG.warn("Adjusting desired capacity to match") self.set_capacity(num_instances) for instance_id in instance_ids: self._as_conn.terminate_instance(instance_id) # TODO(pdmars): due to a bug in phantom, maybe this will help # 2013/04/05: this might not be relevant anymore time.sleep(.1) def launch_autoscale_instances(self, num_instances=1): new_capacity = self._asg.desired_capacity + int(num_instances) if new_capacity > self.config.max_instances: new_capacity = self.config.max_instances LOG.warn("%s can launch %s total instances" % (self.config.name, new_capacity)) self._last_launch_attempt = datetime.datetime.utcnow() LOG.debug("Setting cloud capacity for %s to %s" % (self.config.name, new_capacity)) self.set_capacity(new_capacity) def set_capacity(self, new_capacity): self._asg.set_capacity(new_capacity)
class BotoScaleInterface(ScaleInterface): conn = None saveclcdata = False def __init__(self, clc_host, access_id, secret_key, token): #boto.set_stream_logger('foo') path = '/services/AutoScaling' reg = RegionInfo(name='eucalyptus', endpoint=clc_host) port = 8773 if clc_host[len(clc_host) - 13:] == 'amazonaws.com': clc_host = clc_host.replace('ec2', 'autoscaling', 1) path = '/' reg = None port = 443 self.conn = AutoScaleConnection(access_id, secret_key, region=reg, port=port, path=path, is_secure=True, security_token=token, debug=0) self.conn.APIVersion = '2011-01-01' if not (clc_host[len(clc_host) - 13:] == 'amazonaws.com'): self.conn.auth_region_name = 'Eucalyptus' self.conn.https_validate_certificates = False self.conn.http_connection_kwargs['timeout'] = 30 def __save_json__(self, obj, name): f = open(name, 'w') json.dump(obj, f, cls=BotoJsonScaleEncoder, indent=2) f.close() ## # autoscaling methods ## def create_auto_scaling_group(self, as_group): return self.conn.create_auto_scaling_group(as_group) def delete_auto_scaling_group(self, name, force_delete=False): return self.conn.delete_auto_scaling_group(name, force_delete) def get_all_groups(self, names=None, max_records=None, next_token=None): obj = self.conn.get_all_groups(names, max_records, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_Groups.json") return obj def get_all_autoscaling_instances(self, instance_ids=None, max_records=None, next_token=None): obj = self.conn.get_all_autoscaling_instances(instance_ids, max_records, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_Instances.json") return obj def set_desired_capacity(self, group_name, desired_capacity, honor_cooldown=False): group = self.conn.get_all_groups([group_name])[0] # notice, honor_cooldown not supported. return group.set_capacity(desired_capacity) def set_instance_health(self, instance_id, health_status, should_respect_grace_period=True): return self.conn.set_instance_health(instance_id, health_status, should_respect_grace_period) def terminate_instance(self, instance_id, decrement_capacity=True): return self.conn.terminate_instance(instance_id, decrement_capacity) def update_autoscaling_group(self, as_group): as_group.connection = self.conn return as_group.update() def create_launch_configuration(self, launch_config): return self.conn.create_launch_configuration(launch_config) def delete_launch_configuration(self, launch_config_name): return self.conn.delete_launch_configuration(launch_config_name) def get_all_launch_configurations(self, config_names=None, max_records=None, next_token=None): obj = self.conn.get_all_launch_configurations(names=config_names, max_records=max_records, next_token=next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/AS_LaunchConfigs.json") return obj # policy related def delete_policy(self, policy_name, autoscale_group=None): return self.conn.delete_policy(policy_name, autoscale_group) def get_all_policies(self, as_group=None, policy_names=None, max_records=None, next_token=None): return self.conn.get_all_policies(as_group, policy_names, max_records, next_token) def execute_policy(self, policy_name, as_group=None, honor_cooldown=None): return self.conn.execute_policy(policy_name, as_group, honor_cooldown) def create_scaling_policy(self, scaling_policy): return self.conn.create_scaling_policy(scaling_policy) def get_all_adjustment_types(self): return self.conn.get_all_adjustment_types() # tag related def delete_tags(self, tags): return self.conn.delete_tags(tags) def get_all_tags(self, filters=None, max_records=None, next_token=None): return self.conn.get_all_tags(filters, max_records, next_token) def create_or_update_tags(self, tags): return self.conn.create_or_update_tags(tags)
def test_basic(self): # NB: as it says on the tin these are really basic tests that only # (lightly) exercise read-only behaviour - and that's only if you # have any autoscale groups to introspect. It's useful, however, to # catch simple errors print('--- running %s tests ---' % self.__class__.__name__) c = AutoScaleConnection() self.assertTrue(repr(c).startswith('AutoScaleConnection')) groups = c.get_all_groups() for group in groups: self.assertIsInstance(group, AutoScalingGroup) # get activities activities = group.get_activities() for activity in activities: self.assertIsInstance(activity, Activity) # get launch configs configs = c.get_all_launch_configurations() for config in configs: self.assertIsInstance(config, LaunchConfiguration) # get policies policies = c.get_all_policies() for policy in policies: self.assertIsInstance(policy, ScalingPolicy) # get scheduled actions actions = c.get_all_scheduled_actions() for action in actions: self.assertIsInstance(action, ScheduledUpdateGroupAction) # get instances instances = c.get_all_autoscaling_instances() for instance in instances: self.assertIsInstance(instance, Instance) # get all scaling process types ptypes = c.get_all_scaling_process_types() for ptype in ptypes: self.assertTrue(ptype, ProcessType) # get adjustment types adjustments = c.get_all_adjustment_types() for adjustment in adjustments: self.assertIsInstance(adjustment, AdjustmentType) # get metrics collection types types = c.get_all_metric_collection_types() self.assertIsInstance(types, MetricCollectionTypes) # create the simplest possible AutoScale group # first create the launch configuration time_string = '%d' % int(time.time()) lc_name = 'lc-%s' % time_string lc = LaunchConfiguration(name=lc_name, image_id='ami-2272864b', instance_type='t1.micro') c.create_launch_configuration(lc) found = False lcs = c.get_all_launch_configurations() for lc in lcs: if lc.name == lc_name: found = True break assert found # now create autoscaling group group_name = 'group-%s' % time_string group = AutoScalingGroup(name=group_name, launch_config=lc, availability_zones=['us-east-1a'], min_size=1, max_size=1) c.create_auto_scaling_group(group) found = False groups = c.get_all_groups() for group in groups: if group.name == group_name: found = True break assert found # now create a tag tag = Tag(key='foo', value='bar', resource_id=group_name, propagate_at_launch=True) c.create_or_update_tags([tag]) found = False tags = c.get_all_tags() for tag in tags: if tag.resource_id == group_name and tag.key == 'foo': found = True break assert found c.delete_tags([tag]) # shutdown instances and wait for them to disappear group.shutdown_instances() instances = True while instances: time.sleep(5) groups = c.get_all_groups() for group in groups: if group.name == group_name: if not group.instances: instances = False group.delete() lc.delete() found = True while found: found = False time.sleep(5) tags = c.get_all_tags() for tag in tags: if tag.resource_id == group_name and tag.key == 'foo': found = True assert not found print('--- tests completed ---')
def get_asg_connection(): conn = AutoScaleConnection() autoscale_groups = conn.get_all_groups(max_records=1) return conn
def create_autoscaling(ami_id, sns_arn): """ Creates the autoscaling group for proxy instances Inspired by boto autoscaling tutorial. """ con = AutoScaleConnection(aws_secret_access_key=AWS_SECRET_ACCESS_KEY, aws_access_key_id=AWS_ACCESS_KEY, region=RegionInfo(name=REGION, endpoint='autoscaling.%s.amazonaws.com' % REGION)) print "Creating autoscaling configuration.." config = LaunchConfiguration(name=AUTOSCALING_GROUP_NAME, image_id=ami_id, key_name=KEY_NAME, security_groups=[EC2_SECURITY_GROUP_NAME], instance_type=INSTANCE_TYPE) con.create_launch_configuration(config) print "Create autoscaling group..." ag = AutoScalingGroup(name=AUTOSCALING_GROUP_NAME, launch_config=config, availability_zones=["{0}a".format(REGION)], load_balancers=[ELB_NAME], min_size=AUTOSCALING_MIN_INSTANCES, max_size=AUTOSCALING_MAX_INSTANCES, group_name=AUTOSCALING_GROUP_NAME) con.create_auto_scaling_group(ag) # fetch the autoscale group after it is created (unused but may be necessary) _ = con.get_all_groups(names=[AUTOSCALING_GROUP_NAME])[0] # Create tag name for autoscaling-created machines as_tag = Tag(key='Name', value=AUTOSCALING_GROUP_NAME, propagate_at_launch=True, resource_id=AUTOSCALING_GROUP_NAME) con.create_or_update_tags([as_tag]) print "Creating autoscaling policy..." scaleup_policy = ScalingPolicy(name='scale_up', adjustment_type='ChangeInCapacity', as_name=AUTOSCALING_GROUP_NAME, scaling_adjustment=1, cooldown=AUTOSCALING_COOLDOWN_PERIOD) scaledown_policy = ScalingPolicy(name='scale_down', adjustment_type='ChangeInCapacity', as_name=AUTOSCALING_GROUP_NAME, scaling_adjustment=-1, cooldown=AUTOSCALING_COOLDOWN_PERIOD) con.create_scaling_policy(scaleup_policy) con.create_scaling_policy(scaledown_policy) # Get freshened policy objects scaleup_policy = con.get_all_policies(as_group=AUTOSCALING_GROUP_NAME, policy_names=['scale_up'])[0] scaledown_policy = con.get_all_policies(as_group=AUTOSCALING_GROUP_NAME, policy_names=['scale_down'])[0] print "Creating cloudwatch alarms" cloudwatch_con = CloudWatchConnection(aws_secret_access_key=AWS_SECRET_ACCESS_KEY, aws_access_key_id=AWS_ACCESS_KEY, region=RegionInfo(name=REGION, endpoint='monitoring.%s.amazonaws.com' % REGION)) alarm_dimensions = {"AutoScalingGroupName": AUTOSCALING_GROUP_NAME} scaleup_alarm = MetricAlarm(name='scale_up_on_cpu', namespace='AWS/EC2', metric='CPUUtilization', statistic='Average', comparison='>', threshold=AUTOSCALING_CPU_MAX_THRESHOLD, period='60', evaluation_periods=1, alarm_actions=[scaleup_policy.policy_arn, sns_arn], dimensions=alarm_dimensions) # Don't send SNS on scaledown policy scaledown_alarm = MetricAlarm(name='scale_down_on_cpu', namespace='AWS/EC2', metric='CPUUtilization', statistic='Average', comparison='<', threshold=AUTOSCALING_CPU_MIN_THRESHOLD, period='60', evaluation_periods=1, alarm_actions=[scaledown_policy.policy_arn], dimensions=alarm_dimensions) cloudwatch_con.create_alarm(scaleup_alarm) cloudwatch_con.create_alarm(scaledown_alarm)
def add_ingress_rule(dry_run, go_agent_security_group, go_agent_security_group_owner, go_agent_security_group_name): """ For each ASG (app) in each VPC, add a rule to each SG associated with the ASG's launch configuration that allows SSH ingress from the GoCD agents' SG. BEFORE RUNNING THIS SCRIPT!: - Use the assume_role bash script to assume the role in the proper account/VPC (edx, edge, mckinsey, etc.) - If you don't know what this is, ask someone in DevOps. - THEN run this script. """ asg_conn = AutoScaleConnection() ec2_conn = boto.ec2.connect_to_region('us-east-1') asgs = [] launch_configs = {} security_groups = {} logging.debug('All ASGs:') for group in asg_conn.get_all_groups(): logging.debug(' {}'.format(group)) asgs.append(group) logging.debug('All launch configurations:') for launch_config in asg_conn.get_all_launch_configurations(): logging.debug(' {}'.format(launch_config)) launch_configs[launch_config.name] = launch_config logging.debug('All security groups:') for sec_group in ec2_conn.get_all_security_groups(): logging.debug(' {}'.format(sec_group)) security_groups[sec_group.id] = sec_group # Validate that each ASG has a launch configuration. for group in asgs: try: logging.info("Launch configuration for ASG '{}' is '{}'.".format( group.name, launch_configs[group.launch_config_name] )) except KeyError: logging.error("Launch configuration '{}' for ASG '{}' was not found!".format( group.launch_config_name, group.name )) raise # Construct a fake security group for the prod-tools-goagent-sg security group in the edx-tools account. # This group will be used to grant the go-agents ingress into the ASG's VPCs. go_agent_security_group = boto.ec2.securitygroup.SecurityGroup( name=go_agent_security_group_name, owner_id=go_agent_security_group_owner, id=go_agent_security_group ) # For each launch config, check for the security group. Can support multiple security groups # but the edX DevOps convention is to use a single security group. for group in asgs: launch_config = launch_configs[group.launch_config_name] if len(launch_config.security_groups) > 1: err_msg = "Launch config '{}' for ASG '{}' has more than one security group!: {}".format( launch_config.name, group.name, launch_config.security_groups ) logging.warning(err_msg) continue sg_name = launch_config.security_groups[0] try: # Find the security group. sec_group = security_groups[sg_name] except KeyError: logging.error("Security group '{}' for ASG '{}' was not found!.".format(sg_name, group.name)) logging.info('BEFORE: Rules for security group {}:'.format(sec_group.name)) logging.info(sec_group.rules) try: # Add the ingress rule to the security group. yes_no = six.moves.input("Apply the change to this security group? [Yes]") if yes_no in ("", "y", "Y", "yes"): sec_group.authorize( ip_protocol='tcp', from_port=22, to_port=22, src_group=go_agent_security_group, dry_run=dry_run ) except boto.exception.EC2ResponseError as exc: if exc.status == 412: # If the dry_run flag is set, then each rule addition will raise this exception. # Log it and carry on. logging.info('Dry run is True but rule addition would have succeeded for security group {}.'.format( sg_name )) elif exc.code == "InvalidPermission.Duplicate": logging.info("Rule already exists for {}.".format(sg_name)) else: raise logging.info('AFTER: Rules for security group {}:'.format(sg_name)) logging.info(sec_group.rules)
class IcsAS(object): """ ICS Library for AutoScale """ def __init__(self, region, **kwargs): self.conn = AutoScaleConnection(region=get_region(region), **kwargs) def to_list(self, input): """ Validate input, if not list, but string, make it as a list """ if input is None: return input elif isinstance(input, list): return input elif isinstance(input, basestring): return [input] else: raise IcsASException("Need the type '%s' but '%s' found" % ('list', type(input))) def get_group_name_from_instance(self, instance_id): """ Get the ASG name from the specific instance id :type instance_id: string :param instance_id: EC2 instance id startwith 'i-xxxxxxx' :rtype: string :return: name of the ASG, this instance belongs to """ instances = self.conn.get_all_autoscaling_instances( instance_ids=self.to_list(instance_id)) if instances: return instances[0].group_name else: return None def get_instances_from_group_name(self, name): """ Get the instance from the specific ASG name :type name: string :param name: the specific ASG name :rtype: list :return: a list contains all the instances """ instances = [] for group in self.conn.get_all_groups(names=self.to_list(name)): instances.extend(group.instances) return instances def get_group_from_name(self, name): """ Get the ASG from its name :type name: string :param name: the ASG name :rtype: list :return: a list represents the specific ASG(s) """ return self.conn.get_all_groups(names=self.to_list(name)) def get_launch_config_from_name(self, name): """ Get the Launch Configuration from its name :type name: string :param name: the Launch Configuration name :rtype: list :return: a list represents the specific Launch Configuration(s) """ return self.conn.get_all_launch_configurations( names=self.to_list(name)) def create_launch_config(self, launch_config): """ Create the Launch Configuration :type launch_config: class :param launch_config: boto launch_config object :rtype: string :return: AWS request Id """ return self.conn.create_launch_configuration(launch_config) def delete_launch_config_from_name(self, name): """ Delete the Launch Configuration from its name :type name: string :param name: the name of launch configuration :rtype: string :return: AWS request Id """ log.info("delete the launch configuration:") log.info(">> %s" % name) return self.conn.delete_launch_configuration(name) def update_launch_config(self, name, launch_config): """ Update the Launch Configuration for specific ASG :type name: string :param name: the name of Auto-Scaling Group :type launch_config: class :param launch_config: boto launch_config object :rtype: string :return: AWS request Id """ groups = self.get_group_from_name(name) if groups: group = groups[0] else: raise IcsASException("no such Auto-Scaling Group '%s' found" % name) self.create_launch_config(launch_config) old_lc_name = group.launch_config_name new_lc_name = launch_config.name group.__dict__["launch_config_name"] = launch_config.name group.update() if self.get_launch_config_from_name(new_lc_name): group = self.get_group_from_name(name)[0] if group.launch_config_name == new_lc_name: return self.delete_launch_config_from_name(old_lc_name) else: raise IcsASException("failed to update " + "launch config for ASG '%s'" % name) else: raise IcsASException("no such new launch config '%s'" % new_lc_name) def suspend_scaling_group(self, name, scaling_processes=None): """ Suspends Auto Scaling processes for an Auto Scaling group. :type name: string :param name: the ASG name :type scaling_processes: string or list :param scaling_processes: scaling process names * Launch * Terminate * HealthCheck * ReplaceUnhealthy * AZRebalance * AlarmNotification * ScheduledActions * AddToLoadBalancer """ if not isinstance(name, basestring): return None group = self.get_group_from_name(self.to_list(name))[0] return group.suspend_processes(self.to_list(scaling_processes)) def resume_scaling_group(self, name, scaling_processes=None): """ Resumes Auto Scaling processes for an Auto Scaling group. :type name: string :param name: the ASG name :type scaling_processes: string or list :param scaling_processes: scaling process names * Launch * Terminate * HealthCheck * ReplaceUnhealthy * AZRebalance * AlarmNotification * ScheduledActions * AddToLoadBalancer """ if not isinstance(name, basestring): return None group = self.get_group_from_name(self.to_list(name))[0] return group.resume_processes(self.to_list(scaling_processes)) def terminate_group_instance(self, instance_id, decrement_capacity=True): """ Terminates the specified instance. The desired group size can also be adjusted, if desired. :type instance_id: str :param instance_id: The ID of the instance to be terminated. :type decrement_capability: bool :param decrement_capacity: Whether to decrement the size of the autoscaling group or not. """ return self.conn.terminate_instance( instance_id=instance_id, decrement_capacity=decrement_capacity) def update_instance_health(self, instance_id, health_status, grace_period=False): """ Explicitly set the health status of an instance. :type instance_id: str :param instance_id: The identifier of the EC2 instance :type health_status: str :param health_status: The health status of the instance. * Healthy: the instance is healthy and should remain in service. * Unhealthy: the instance is unhealthy. \ Auto Scaling should terminate and replace it. :type grace_period: bool :param grace_period: If True, this call should respect the grace period associated with the group. """ self.conn.set_instance_health(instance_id, health_status, should_respect_grace_period=grace_period)
def setup(CONF): global out lookup_tbl = { 'name': CONF['NAME'], } conn = AutoScaleConnection() out['conn'] = conn # Launch Configurations LC = CONF['LC'] LC['name'] = LC['name'] % lookup_tbl lc = LaunchConfiguration(**LC) conn.create_launch_configuration(lc) out['lc'] = lc # Auto Scaling Group ASG = CONF['ASG'] ASG['group_name'] = ASG['group_name'] % lookup_tbl ASG['launch_config'] = lc groups = conn.get_all_groups(names=[ASG['group_name']]) if (len(groups) > 0): # update asg = groups[0] for k in ASG : # asg not iterable, try-except to make sure asg[k] exists try: asg.__getattribute__(k) except: continue asg.__setattr__(k, ASG[k]) asg.launch_config_name = LC['name'] asg.update() out['asg'] = asg else: #create asg = AutoScalingGroup(**ASG) conn.create_auto_scaling_group(asg) # ASG Tags ASG_TAGS = CONF['ASG_TAGS'] for i in ASG_TAGS: if 'propagate_at_launch' not in i: i['propagate_at_launch'] = True i['key'] = i['key'] % lookup_tbl i['value'] = i['value'] % lookup_tbl tags = [ Tag(**dict(x.items() + [('resource_id', ASG['group_name'])])) for x in ASG_TAGS ] conn.create_or_update_tags(tags) # Triggers (Scaling Policy / Cloudwatch Alarm) conn_cw = connect_to_region(CONF['REGION']) TRIGGERS = CONF['TRIGGERS'] for T in TRIGGERS: T['policy']['name'] = T['policy']['name'] % lookup_tbl T['policy']['as_name'] = ASG['group_name'] T['alarm']['dimensions'] = {'AutoScalingGroupName': ASG['group_name']} T['alarm']['alarm_actions'] = None if 'name' in T['alarm']: T['alarm']['name'] = T['alarm']['name'] % lookup_tbl else: T['alarm']['name'] = T['policy']['name'] # Policies are safely overwritten, so not checked for existence conn.create_scaling_policy(ScalingPolicy(**T['policy'])) policy = conn.get_all_policies(as_group=ASG['group_name'], policy_names=[T['policy']['name']])[0] T['alarm']['alarm_actions'] = [policy.policy_arn] hits = conn_cw.describe_alarms(alarm_names=[T['alarm']['name']]) conn_cw.create_alarm(MetricAlarm(**T['alarm']))
all_healthy = False for g in group_activities: print g if int(g.progress) == 100: all_healthy = True else: all_healthy = False break return all_healthy #make connections autoscaling_conn = AutoScaleConnection(os.environ['AWS_ACCESS_KEY_ID'], os.environ['AWS_SECRET_ACCESS_KEY']) ec2_conn = boto.ec2.connect_to_region('us-east-1') #get the autoscaling groups a_groups = autoscaling_conn.get_all_groups() for group in a_groups: instances = get_instances_from_group(group) time.sleep(30) #wait until all groups have all healthy instances while(True): all_healthy = False #go through all the groups for group in a_groups: #check if the group is ready group.update() #if the group is live if check_group_health(group.get_activities()): print "Group Passes" print group