def __init__(self, json_doc, required_stack_id=None): self.log = LogUtil.get_root_logger() self._instances = {'list':[], "scaling":[]} self._asg = boto3.client('autoscaling', InventoryConfig.region_name) self.name = json_doc['AutoScalingGroupName'] self.private_ips = [] self.node_hostdefs = [] self.scale_override = True self.scaling_events = [] self.scale_in_progress_instances = {'terminate':[], 'launch':[]} self.cooldown = json_doc['DefaultCooldown'] self.logical_name = None self._cooldown_upperlimit = self.cooldown * 3 if self._cooldown_upperlimit >= 300: self._cooldown_upperlimit = 300 self.elb_name = None self.stack_id = None for tag in self._grab_tags(json_doc['Tags']): self.__dict__[tag['key']] = tag['value'] self.in_openshift_cluster = self._determine_cluster_membership() if self.in_openshift_cluster: # Set the logcal_name self.logical_name = InventoryConfig.logical_names[self.logical_id] # Sanity check to verify they're in the API. # - and populate the InventoryConfig.all_instances dict as a result. # - working around edge cases. ilist = [i['InstanceId'] for i in json_doc['Instances']] InventoryScaling.wait_for_api(instance_id_list=ilist) # Grab instances for instance in self._grab_instance_metadata(json_doc['Instances']): self._instances[instance.InstanceId] = instance self._instances['list'].append(instance.InstanceId) self.private_ips += instance.private_ips # Grab scaling events. Anything newer than (self.cooldown * 3). # However, only do so if we're not populating the initial inventory. if not InventoryConfig.generate_initial_inventory: for scaling_event in self._grab_current_scaling_events(): self.scaling_events.append(scaling_event) # If the instance is not already in the config. Done to compensate for the self._cooldown_upperlimit var. if (scaling_event.type == 'launch') and (scaling_event.instance in InventoryConfig.known_instances.keys()): continue if (scaling_event.type == 'launch') and (scaling_event.instance in self.scale_in_progress_instances['terminate']): continue self.scale_in_progress_instances[scaling_event.type].append(scaling_event.instance) self._instances['scaling'].append(scaling_event.instance) for instance in self._instances['list']: if (instance not in InventoryConfig.known_instances.keys()) and (instance not in self._instances['scaling']): self.scale_in_progress_instances['launch'].append(instance) self.scale_override = True self.openshift_config_category = self._determine_openshift_category(self.logical_id) # Grab Inventory host definitions for combined_hostdef in self.generate_asg_node_hostdefs(): instance_id, hostdef = combined_hostdef self.node_hostdefs.append(hostdef) InventoryConfig.provisioning_hostdefs[instance_id] = hostdef
#!/usr/bin/env python import argparse import os import subprocess import tempfile import shlex import time import sys from aws_openshift_quickstart.utils import * from aws_openshift_quickstart.logger import LogUtil LogUtil.set_log_handler('/var/log/openshift-quickstart-scaling.log') log = LogUtil.get_root_logger() def generate_inital_inventory_nodes(write_hosts_to_temp=False): """ Generates the initial ansible inventory. Instances only. """ #TODO: Add debugging statements def _varsplit(filename): if not os.path.exists(filename): return {} if os.path.getsize(filename) == 1: return {} _vs={} with open(filename,'r') as f: varlines = f.readlines() for l in varlines: try: l_stripped = l.strip('\n') if l_stripped == '':
def __init__(self, json_doc, version='3.9'): self.log = LogUtil.get_root_logger() self._instances = {'list': [], "scaling": []} self._asg = boto3.client('autoscaling', InventoryConfig.region_name) self.name = json_doc['AutoScalingGroupName'] self.private_ips = list() self.scaling_events = list() self.node_hostdefs = dict() self.scale_in_progress_instances = {'terminate': [], 'launch': []} self.cooldown = json_doc['DefaultCooldown'] self._cooldown_upperlimit = self.cooldown * 3 self.scale_override = False self.logical_name = None self.elb_name = None self.stack_id = None self.logical_id = None if self._cooldown_upperlimit <= 300: self._cooldown_upperlimit = 300 for tag in self._grab_tags(json_doc['Tags']): self.__dict__[tag['key']] = tag['value'] self.in_openshift_cluster = self._determine_cluster_membership() if self.in_openshift_cluster: self.openshift_config_category = self._determine_openshift_category( self.logical_id) # Set the logical_name self.logical_name = InventoryConfig.logical_names[self.logical_id] # Sanity check to verify they're in the API. # - and populate the InventoryConfig.all_instances dict as a result. # - working around edge cases. ilist = [i['InstanceId'] for i in json_doc['Instances']] InventoryScaling.wait_for_api(instance_id_list=ilist) # Grab instances for instance in self._grab_instance_metadata( json_doc['Instances']): self._instances[instance.InstanceId] = instance self._instances['list'].append(instance.InstanceId) self.private_ips += instance.private_ips # Grab scaling events. Anything newer than (self.cooldown * 3). # However, only do so if we're not populating the initial inventory. if not InventoryConfig.initial_inventory: for scaling_event in self._grab_current_scaling_events(): self.scaling_events.append(scaling_event) # If the instance is not already in the config. Done to compensate for the self._ # cooldown_upperlimit var. if (scaling_event.event_type == 'launch') and ( scaling_event.instance in InventoryConfig.known_instances.keys()): continue if (scaling_event.event_type == 'launch') and ( scaling_event.instance in self.scale_in_progress_instances['terminate']): continue self.scale_in_progress_instances[ scaling_event.event_type].append( scaling_event.instance) self._instances['scaling'].append(scaling_event.instance) for instance in self._instances['list']: # Sanity check. # - If the instance is not in the known_instances list, or defined in a recent scaling event, # but is in the ASG (we dont know about it otherwise) # -- Add it to the scale_in_progress list, and set scale_override to True, so a scale-up occurs. # (See: scaler.scale_ if (instance not in InventoryConfig.known_instances.keys() ) and (instance not in self._instances['scaling']): self.scale_in_progress_instances['launch'].append( instance) self.scale_override = True # Grab Inventory host definitions for combined_hostdef in self.generate_asg_node_hostdefs(version): instance_id, hostdef = combined_hostdef InventoryConfig.id_to_ip_map[instance_id] = hostdef[ 'ip_or_dns'] del hostdef['ip_or_dns'] InventoryConfig.provisioning_hostdefs[instance_id] = hostdef self.node_hostdefs.update(hostdef)
class InventoryScaling(object): """ Class to faciliate scaling activities in the Cluster's Auto Scaling Groups. """ log = LogUtil.get_root_logger() nodes_to_add = copy.deepcopy(InventoryConfig.asg_node_skel) nodes_to_remove = copy.deepcopy(InventoryConfig.asg_node_skel) nodes_to_add['combined'] = [] nodes_to_remove['combined'] = [] ansible_results = {} _client = None @classmethod def wait_for_api(cls, instance_id_list=None): """ Wait for instances in (class).nodes_to_add to show up in DescribeInstances API Calls. From there, we add them to the InventoryConfig.all_instances dictionary. This is necessary to allow the instances to be written to the Inventory config file """ if not instance_id_list: instance_id_list = cls.nodes_to_add['combined'] cls.log.info( "[wait_for_api]: Waiting for the EC2 API to return new instances.") cls._client = boto3.client('ec2', InventoryConfig.region_name) waiter = cls._client.get_waiter('instance_exists') waiter.wait(InstanceIds=instance_id_list) for instance in cls._fetch_newly_launched_instances_from_api( cls.nodes_to_add['combined']): cls.log.debug( "[{}] has been detected in the API.".format(instance)) InventoryConfig.all_instances[instance['InstanceId']] = instance cls.log.info("[wait_for_api] Complete") @classmethod def _fetch_newly_launched_instances_from_api(cls, instance_id_list): """ Generator. Fetches the newly-launched instances from the API. """ filters = [{'Name': 'instance-id', 'Values': instance_id_list}] all_instances = cls._client.describe_instances( Filters=filters)['Reservations'] i = 0 while i < len(all_instances): j = 0 while j < len(all_instances[i]['Instances']): yield all_instances[i]['Instances'][j] j += 1 i += 1 @classmethod def process_pipeline(cls): """ ClassMethod that - prunes the config, removing nodes that are terminating. - adds nodes to the config that just launched """ cls.log.info("We're processing the scaling pipeline") # Remove the nodes (from config) that are terminating. if cls.nodes_to_remove['combined']: cls.log.info( "We have the following nodes to remove from the inventory:") cls.log.info("{}".format(cls.nodes_to_remove['combined'])) for category in cls.nodes_to_remove.keys(): if category == 'combined': continue # cls.nodes_to_remove[category] is a list of instance IDs. cls.remove_node_from_section(cls.nodes_to_remove[category], category) else: cls.log.info("No nodes were found to remove from the inventory.") # Add the nodes that are launching. if cls.nodes_to_add['combined']: cls.log.info( "We have the following nodes to add to the inventory:") cls.log.info("{}".format([x for x in cls.nodes_to_add['combined']])) for category in cls.nodes_to_add.keys(): if category == 'combined': continue cls.log.debug("Adding nodes {} to the {} category".format( cls.nodes_to_add[category], category)) cls.add_nodes_to_section(cls.nodes_to_add[category], category) cls.log.info("Complete!") else: cls.log.info("No nodes were found to add to the inventory.") @classmethod def add_nodes_to_section(cls, nodes, category, fluff=True, migrate=False): """ Adds a node (private IP / private DNS Entry) to a config section """ acfg = InventoryConfig.ansible_host_cfg ic = InventoryConfig if not migrate: # dict. not list. if fluff: new_node_section = 'new_' + category else: new_node_section = category prov_sec = ic.inventory_categories['provision'][0] # FIXME: account for dict. for n in nodes: if n in ic.known_instances.keys(): continue acfg[new_node_section].update( ic.provisioning_hostdefs[ic.ip_to_id_map[n]]) acfg[prov_sec].update( ic.provisioning_hostdefs[ic.ip_to_id_map[n]]) else: # dict passed my the migrate wrapper. acfg[category].update(nodes) @classmethod def remove_node_from_section(cls, node, category, migrate=False, use_migration_dict=True): """ ClassMethod to remove a list of nodes from a list of categories within the config file. . """ migration_dict = {} categories = [category, '{}_{}'.format('new', category)] if migrate: # Leaving only new_{category} del categories[categories.index(category)] categories += InventoryConfig.inventory_categories['provision'] for node_key in node: for cat in categories: try: cls.log.info("Removing {} from category {}".format( node_key, cat)) if migrate and use_migration_dict: migration_dict.update({ node_key: InventoryConfig.ansible_host_cfg[cat][node_key] }) del InventoryConfig.ansible_host_cfg[cat][node_key] except KeyError: cls.log.debug( "{} wasn't present within {} after all.".format( node_key, cat)) if migrate: return migration_dict @classmethod def migrate_nodes_between_section(cls, nodes, category, additional_add=None): """ Wrapper to migrate successful nodes between new_{category} and {category} labels within the Ansible inventory. Additionally removes node from the provisioning category. """ if additional_add is None: additional_add = [] cls.log.debug( "migrate_nodes_between_section - nodes: %s category: %s additional_add: %s" % (nodes, category, additional_add)) add_dict = cls.remove_node_from_section(nodes, category, migrate=True) if 'master' in category: _ = cls.remove_node_from_section(nodes, 'nodes', migrate=True, use_migration_dict=False) cls.add_nodes_to_section(add_dict, category, migrate=True) for addcat in additional_add: cls.add_nodes_to_section(add_dict, addcat, migrate=True) cls.log.info( "Nodes: {} have been permanately added to the Inventory under the {} category" .format(nodes, category)) cls.log.info( "They've additionally been removed from the provision_in_progress category" ) @classmethod def process_playbook_json_output(cls, jout_file, category): """ Processes the output from the ansible playbook run and determines what hosts failed / were unreachable / succeeded. The results are put in (Class).ansible_results, keyed by category name. """ # The json_end_idx reference below is important. The playbook run is in json output, # however the text we're opening here is a mix of free-text and json. # it's formatted like this. # <optional> free text # Giant Glob of JSON # <optional> free text. # The json_end_idx variable in this function defines the end of the json. # Without it, JSON parsing will fail. dt = datetime.datetime.now() with open(jout_file, 'r') as f: all_output = f.readlines() if len(all_output) > 1: json_start_idx = all_output.index('{\n') json_end_idx, _ = max(enumerate(all_output), key=operator.itemgetter(1)) else: if len(all_output) == 1: cls.log.error("ansible output:") cls.log.error(all_output[0]) else: cls.log.error("ansible produced no output") raise Exception('Failed to parse ansible output') j = json.loads(''.join(all_output[json_start_idx:json_end_idx + 1]))['stats'] unreachable = [] failed = [] succeeded = [] if 'localhost' in j.keys(): del j['localhost'] for h in j.keys(): if j[h]['unreachable'] != 0: unreachable.append(h) elif j[h]['failures'] != 0: failed.append(h) else: succeeded.append(h) # ran into issues where etcd_prescale_down category key does not exist in the dict if category not in cls.nodes_to_add.keys(): cls.nodes_to_add[category] = [] # Pruning down to category only. cat_results = { 'succeeded': [x for x in succeeded if x in cls.nodes_to_add[category]], 'failed': [x for x in failed if x in cls.nodes_to_add[category]], 'unreachable': [x for x in unreachable if x in cls.nodes_to_add[category]] } cls.ansible_results[category] = cat_results cls.log.info("- [{}] playbook run results: {}".format( category, cat_results)) final_logfile = "/var/log/aws-quickstart-openshift-scaling.{}-{}-{}-{}T{}{}{}".format( category, dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) os.rename(jout_file, final_logfile) cls.log.info("The json output logfile has been moved to %s" % final_logfile) @classmethod def summarize_playbook_results(cls): cls.log.debug("ansible_results: %s" % cls.ansible_results) for cat in cls.ansible_results.keys(): cls.log.debug( "running %s to see whether inventory must be updated" % cat) if not cat.startswith("pre_"): additional_add = [] cjson = cls.ansible_results[cat] cls.log.debug("cjson: %s" % cjson) cls.log.info( "Category: {}, Results: {} / {} / {}, ({} / {} / {})". format(cat, len(cjson['succeeded']), len(cjson['failed']), len(cjson['unreachable']), 'Succeeded', 'Failed', 'Unreachable')) if cat == 'masters': additional_add = ['nodes'] cls.log.debug( "running cls.migrate_nodes_between_section(%s, %s, %s)" % (cjson['succeeded'], cat, additional_add)) cls.migrate_nodes_between_section( cjson['succeeded'], cat, additional_add=additional_add)
class InventoryConfig(object): """ Class to hold all of the configuration related objects / methods Methods: - setup: Initial class setup. - populate_from_ansible_inventory: Populates the known_instances dict w/data from the ansible inventory. - _determine_region_name: Determines the region that the cluster is in. - """ log = LogUtil.get_root_logger() initial_inventory = False scale = False id_to_ip_map = dict() ansible_host_cfg = dict() all_instances = dict() known_instances = dict() ansible_inventory_file = '/etc/ansible/hosts' ansible_playbook_wrapper = "/usr/share/ansible/openshift-ansible/scaleup_wrapper.yml" playbooks = dict() playbook_directory = "/usr/share/ansible/openshift-ansible/" pre_scaleup_playbook = "{}{}".format(playbook_directory, "pre_scaleup.yml") pre_scaledown_playbook = "{}{}".format(playbook_directory, "pre_scaledown.yml") post_scaleup_playbook = "{}{}".format(playbook_directory, "post_scaleup.yml") post_scaledown_playbook = "{}{}".format(playbook_directory, "post_scaledown.yml") inventory_categories = { "master": ["masters", "new_masters"], "etcd": ["etcd", "new_etcd"], "node": ["nodes", "new_nodes"], "glusterfs": ["glusterfs", "new_glusterfs"], "provision": ["provision_in_progress"] } inventory_node_skel = { "master": [], "etcd": [], "node": [], "glusterfs": [], "provision": [] } asg_node_skel = { "masters": [], "etcd": [], "nodes": [], "glusterfs": [], "provision": [] } ansible_full_cfg = {} provisioning_hostdefs = {} inventory_nodes = copy.deepcopy(inventory_node_skel) inventory_nodes['ids'] = {} logical_names = { "OpenShiftEtcdASG": "etcd", "OpenShiftMasterASG": "masters", "OpenShiftNodeASG": "nodes", "OpenShiftGlusterASG": "glusterfs" } stack_id = None ec2 = None region_name = None instance_id = None ip_to_id_map = None @classmethod def setup(cls): """ function to setup the variables initially (populate from inventory, etc) """ cls.log.info("Setting up the InventoryConfig Class") if not cls.initial_inventory: cls.load_ansible_inventory_file() cls.region_name = cls._determine_region_name() cls.instance_id = cls._determine_local_instance_id() cls.ec2 = boto3.client('ec2', cls.region_name) for tag in cls._grab_local_tags(): cls.log.debug( "Applying: [{}] / Value [{}] - as a method within the cluster." .format(tag['key'], tag['value'])) setattr(cls, tag['key'], tag['value']) for instance in cls._grab_all_instances(): iid = instance['InstanceId'] cls.all_instances[iid] = instance cls.log.debug("The EC2 API Told me about these instances: {}".format( cls.all_instances.keys())) cls.log.info("InventoryConfig setup complete!") @classmethod def load_ansible_inventory_file(cls): cls.log.info("Loading ansible inventory file from on-disk...") try: with open(cls.ansible_inventory_file, 'r') as f: unparsed_document = f.read() parsed_document = yaml.load(unparsed_document) except Exception as e: raise e cls.ansible_full_cfg = parsed_document for (k, v) in parsed_document['OSEv3']['children'].iteritems(): if len(v) == 0: continue cls.ansible_host_cfg[k] = v['hosts'] cls.log.info("...Complete") @classmethod def write_ansible_inventory_file(cls, init=False): if not init: transformed_host_cfg = { k: { 'hosts': v } for (k, v) in cls.ansible_host_cfg.iteritems() } cls.ansible_full_cfg['OSEv3']['children'].update( transformed_host_cfg) with open(cls.ansible_inventory_file, 'w') as f: f.write(yaml.dump(cls.ansible_full_cfg, default_flow_style=False)) @classmethod def verify_required_sections_exist(cls, generate=False): """ Verifies that the required sections exist within the Inventory. Ex: new_(masters|nodes|etcd), provision_in_progress """ save_needed = False sections = [ y for x in cls.inventory_categories.itervalues() for y in x ] cls.log.info( "I'm now verifying that all required sections are present in our runtime config..." ) if generate: cls.log.info("Accounting for initial inventory generation") compare_dict = cls.ansible_full_cfg['OSEv3']['children'] else: compare_dict = cls.ansible_host_cfg for section in sections: if section not in compare_dict.keys(): save_needed = True compare_dict[section] = {} cls.log.info( "The section [{}] was not present in the Ansible Inventory. I'll add it..." .format(section)) cls.log.info("...Complete.") if save_needed: if generate: cls.ansible_full_cfg['OSEv3']['children'] = compare_dict else: cls.ansible_host_cfg = compare_dict @classmethod def populate_from_ansible_inventory(cls): """ Populates the InventoryConfig class with data from the existing anisble inventory """ cls.log.info( "We're populating the runtime config from data within the Ansible Inventory" ) ac = cls.ansible_host_cfg ic = cls.inventory_categories for category, subcategory in ic.iteritems(): cls.log.debug("Category: {}".format(category)) if category == 'provision': continue for sc in subcategory: if sc != category: cls.log.debug("\tSubcategory: [{}]/{}".format( category, sc)) if len(ac[sc]) == 0: cls.log.debug( "\t No hosts within this subcategory. Moving on.") continue for x, y in ac[sc].iteritems(): ip = x try: instance_id = y['instance_id'] except KeyError: cls.log.info( "Not able to associate an Instance ID with the Private DNS Entry: {}." .format(ip)) continue cls.inventory_nodes[category].append(x) cls.id_to_ip_map[instance_id] = x cls.log.debug("I just added {} to the {} category".format( ip, category)) cls.known_instances[instance_id] = ip cls.log.debug( "The Instance ID {} has been tied to the Private DNS Entry: {}" .format(instance_id, ip)) @classmethod def _determine_region_name(cls): """ Queryies the metadata service to determine the current Availability Zone. Extrapolates the region based on the AZ returned """ resp = requests.get( 'http://169.254.169.254/latest/meta-data/placement/availability-zone' ) return resp.text[:-1] @classmethod def _determine_local_instance_id(cls): """ Queries the metadata service to determine the local instance ID """ resp = requests.get( 'http://169.254.169.254/latest/meta-data/instance-id') return resp.text @classmethod def _grab_all_instances(cls): """ Generator around an ec2.describe_instances() call. Uses a filter to narrow down results. """ filters = [{ "Name": "tag:aws:cloudformation:stack-id", "Values": [InventoryConfig.stack_id] }] all_instances = cls.ec2.describe_instances( Filters=filters)['Reservations'] i = 0 while i < len(all_instances): j = 0 while j < len(all_instances[i]['Instances']): yield all_instances[i]['Instances'][j] j += 1 i += 1 @classmethod def _grab_local_tags(cls): """ Grabs the Cloudformation-set tags on the local instance. Dependent on the results of _determine_local_instance_id() """ ec2 = boto3.resource('ec2', cls.region_name) local_instance = ec2.Instance(cls.instance_id) i = 0 while i < len(local_instance.tags): if 'cloudformation' in local_instance.tags[i]['Key']: _k = local_instance.tags[i]['Key'].split(':')[2] yield { 'key': _k.replace('-', '_'), 'value': local_instance.tags[i]['Value'] } i += 1
class InventoryConfig(object): """ Class to hold all of the configuration related objects / methods Methods: - setup: Initial class setup. - populate_from_ansible_inventory: Populates the known_instances dict w/data from the ansible inventory. - _determine_region_name: Determines the region that the cluster is in. - """ log = LogUtil.get_root_logger() region_name = None instance_id = None scale = False all_instances = {} known_instances = {} known_instances_iplist = [] _instance_pattern = 'i-[0-9a-z]+' generate_initial_inventory = False inventory_file = '/etc/ansible/hosts' ansible_playbook_wrapper = "/usr/share/ansible/openshift-ansible/scaleup_wrapper.yml" inventory_categories = { "master": ["masters", "new_masters"], "etcd": ["etcd", "new_etcd"], "node": ["nodes", "new_nodes"], "provision": ["provision_in_progress"] } _inventory_node_skel = { "master": [], "etcd": [], "node": [], "provision": [] } _asg_node_skel = {"masters": [], "etcd": [], "nodes": [], "provision": []} provisioning_hostdefs = {} inventory_nodes = copy.deepcopy(_inventory_node_skel) inventory_nodes['ids'] = {} logical_names = { "OpenShiftEtcdASG": "etcd", "OpenShiftMasterASG": "masters", "OpenShiftNodeASG": "nodes" } @classmethod def setup(cls): """ function to setup the variables initially (populate from inventory, etc) """ cls.log.info("Setting up the InventoryConfig Class") cls.region_name = cls._determine_region_name() cls.instance_id = cls._determine_local_instance_id() cls.ec2 = boto3.client('ec2', cls.region_name) for tag in cls._grab_local_tags(): cls.log.debug( "Applying: [{}] / Value [{}] - as a method within the cluster." .format(tag['key'], tag['value'])) setattr(cls, tag['key'], tag['value']) for instance in cls._grab_all_instances(): iid = instance['InstanceId'] cls.all_instances[iid] = instance cls.log.debug("The EC2 API Told me about these instances: {}".format( cls.all_instances.keys())) cls.log.info("InventoryConfig setup complete!") @classmethod def verify_required_sections_exist(cls): """ Verifies that the required sections exist within the Inventory. Ex: new_(masters|nodes|etcd) """ inventory_config = InventoryConfig.c a = cls.inventory_categories for x in a: for y in a[x]: if not inventory_config.has_section(y): inventory_config.add_section(y) if 'provision' not in y: inventory_config.set('OSEv3:children', y) @classmethod def populate_from_ansible_inventory(cls): """ Populates the InventoryConfig class with data from the existing anisble inventory """ cls.log.info( "We're populating the runtime config from data within the Ansible Inventory" ) inventory_config = InventoryConfig.c for category in cls.inventory_categories.keys(): cls.log.debug("Category: {}".format(category)) if category == 'provision': continue for subcategory in cls.inventory_categories[category]: cls.log.debug("\tSubcategory: {}".format(subcategory)) for key in inventory_config.options(subcategory): ip = key.split()[0] cls.inventory_nodes[category].append(ip) cls.log.debug("I just added {} to the {} category".format( ip, category)) _pattern = re.compile(cls._instance_pattern) _host_kv = inventory_config.get(subcategory, key) if _host_kv == None: _search_string = key else: _search_string = _host_kv _instance_id = _pattern.search(_search_string).group() if _instance_id: cls.known_instances[_instance_id] = ip cls.known_instances_iplist.append(ip) cls.log.debug( "The Instance ID {} has been tied to the Private DNS Entry: {}" .format(_instance_id, ip)) else: cls.log.debug("No instance ID was found!") @classmethod def _determine_region_name(cls): """ Queryies the metadata service to determine the current Availability Zone. Extrapolates the region based on the AZ returned """ resp = requests.get( 'http://169.254.169.254/latest/meta-data/placement/availability-zone' ) return resp.text[:-1] @classmethod def _determine_local_instance_id(cls): """ Queries the metadata service to determine the local instance ID """ resp = requests.get( 'http://169.254.169.254/latest/meta-data/instance-id') return resp.text @classmethod def _grab_all_instances(cls): """ Generator around an ec2.describe_instances() call. Uses a filter to narrow down results. """ filters = [{ "Name": "tag:aws:cloudformation:stack-id", "Values": [InventoryConfig.stack_id] }] all_instances = cls.ec2.describe_instances( Filters=filters)['Reservations'] i = 0 while i < len(all_instances): j = 0 while j < len(all_instances[i]['Instances']): yield all_instances[i]['Instances'][j] j += 1 i += 1 @classmethod def _grab_local_tags(cls): """ Grabs the Cloudformation-set tags on the local instance. Dependent on the results of _determine_local_instance_id() """ ec2 = boto3.resource('ec2', cls.region_name) local_instance = ec2.Instance(cls.instance_id) i = 0 while i < len(local_instance.tags): if 'cloudformation' in local_instance.tags[i]['Key']: _k = local_instance.tags[i]['Key'].split(':')[2] yield { 'key': _k.replace('-', '_'), 'value': local_instance.tags[i]['Value'] } i += 1
class InventoryScaling(object): """ Class to faciliate scaling activities in the Cluster's Auto Scaling Groups. """ log = LogUtil.get_root_logger() _incoming_instances = copy.deepcopy(InventoryConfig._inventory_node_skel) nodes_to_add = copy.deepcopy(InventoryConfig._asg_node_skel) nodes_to_remove = copy.deepcopy(InventoryConfig._asg_node_skel) nodes_to_add['combined'] = [] nodes_to_remove['combined'] = [] ansible_results = {} @classmethod def wait_for_api(cls, instance_id_list=[]): """ Wait for instances in (class).nodes_to_add to show up in DescribeInstances API Calls. From there, we add them to the InventoryConfig.all_instances dictionary. This is necessary to allow the instances to be written to the Inventory config file """ if not instance_id_list: instance_id_list = cls.nodes_to_add['combined'] cls.log.info( "[wait_for_api]: Waiting for the EC2 API to return new instances.") cls._client = boto3.client('ec2', InventoryConfig.region_name) waiter = cls._client.get_waiter('instance_exists') waiter.wait(InstanceIds=cls.nodes_to_add['combined']) for instance in cls._fetch_newly_launched_instances_from_api( cls.nodes_to_add['combined']): cls.log.debug( "[{}] has been detected in the API.".format(instance)) InventoryConfig.all_instances[instance['InstanceId']] = instance cls.log.info("[wait_for_api] Complete") @classmethod def _fetch_newly_launched_instances_from_api(cls, instance_id_list): """ Generator. Fetches the newly-launched instances from the API. """ all_instances = cls._client.describe_instances( InstanceIds=instance_id_list)['Reservations'] i = 0 while i < len(all_instances): j = 0 while j < len(all_instances[i]['Instances']): yield all_instances[i]['Instances'][j] j += 1 i += 1 @classmethod def process_pipeline(cls): """ ClassMethod that - prunes the config, removing nodes that are terminating. - adds nodes to the config that just launched """ cls.log.info("We're processing the scaling pipeline") # Remove the nodes (from config) that are terminating. if cls.nodes_to_remove['combined']: cls.log.info( "We have the following nodes to remove from the inventory:") cls.log.info("{}".format(cls.nodes_to_remove['combined'])) for category in cls.nodes_to_remove.keys(): if category == 'combined': continue cls.remove_node_from_section(cls.nodes_to_remove[category], category) else: cls.log.info("No nodes were found to remove from the inventory.") # Add the nodes that are launching. if cls.nodes_to_add['combined']: cls.log.info( "We have the following nodes to add to the inventory:") cls.log.info("{}".format( [x.split()[0] for x in cls.nodes_to_add['combined']])) for category in cls.nodes_to_add.keys(): if category == 'combined': continue cls._incoming_instances[category] = [ x.split()[0] for x in cls.nodes_to_add[category] ] cls.log.debug("Adding nodes {} to the {} category".format( cls._incoming_instances[category], category)) cls.add_nodes_to_section(cls.nodes_to_add[category], category) cls.log.info("Complete!") else: cls.log.info("No nodes were found to add to the inventory.") @classmethod def add_nodes_to_section(cls, node_list, category, fluff=True, migrate=False): """ Adds a node (private IP) to a config section """ c = InventoryConfig.c if not migrate: _provisioning_section = InventoryConfig.inventory_categories[ 'provision'][0] if not c.has_section(_provisioning_section): c.set('OSEv3:children', _provisioning_section) if not c.has_section('new_' + category): if category == 'provision': pass else: c.set('OSEv3:children', 'new_' + category) for n in node_list: ip = n.split()[0] if ip in InventoryConfig.known_instances_iplist: continue if fluff: _new_node_section = 'new_' + category else: _new_node_section = category if not c.has_option(_provisioning_section, ip): c.set(_provisioning_section, ip) if not c.has_option(_new_node_section, n): c.set(_new_node_section, n) else: for n in node_list: c.set(category, n) @classmethod def remove_node_from_section(cls, node, category, migrate=False): """ ClassMethod to remove a list of nodes from a list of categories within the config file. . """ c = InventoryConfig.c migration_removed = [] categories = [category, '{}_{}'.format('new', category)] if migrate: del categories[categories.index(category)] categories += InventoryConfig.inventory_categories['provision'] for cat in categories: for n in node: if 'provision' in cat: c.remove_option(cat, n) continue for idx in c.options(cat): if idx.split()[0] == n: if migrate: full_idx = c.get(cat, idx) if full_idx is None: full_idx = idx migration_removed.append(full_idx) c.remove_option(cat, idx) continue if migrate: return migration_removed @classmethod def migrate_nodes_between_section(cls, node, category): """ Wrapper to migrate successful nodes between new_{category} and {category} labels within the Ansible inventory. Additionally removes node from the provisioning category. """ addlist = cls.remove_node_from_section(node, category, migrate=True) if 'master' in category: _ = cls.remove_node_from_section(node, 'nodes', migrate=True) cls.add_nodes_to_section(addlist, category, migrate=True) cls.log.info( "Nodes: {} have been permanately added to the Inventory under the {} category" .format(node, category)) cls.log.info( "They've additionally been removed from the provision_in_progress category" ) @classmethod def process_playbook_json_output(cls, jout_file, category): """ Processes the output from the ansible playbook run and determines what hosts failed / were unreachable / succeeded. The results are put in (Class).ansible_results, keyed by category name. """ # The json_end_idx reference below is important. The playbook run is in json output, # however the text we're opening here is a mix of free-text and json. # it's formatted like this. # <optional> free text # Giant Glob of JSON # <optional> free text. # The json_end_idx variable in this function defines the end of the json. # Without it, JSON parsing will fail. dt = datetime.datetime.now() with open(jout_file, 'r') as f: all_output = f.readlines() if len(all_output) > 1: json_start_idx = all_output.index('{\n') json_end_idx, _ = max(enumerate(all_output), key=operator.itemgetter(1)) else: idx = 0 j = json.loads(''.join(all_output[json_start_idx:json_end_idx + 1]))['stats'] unreachable = [] failed = [] succeeded = [] del j['localhost'] for h in j.keys(): if j[h]['unreachable'] != 0: unreachable.append(h) elif j[h]['failures'] != 0: failed.append(h) else: succeeded.append(h) # Pruning down to category only. cat_results = { 'succeeded': [x for x in succeeded if x in cls._incoming_instances[category]], 'failed': [x for x in failed if x in cls._incoming_instances[category]], 'unreachable': [x for x in unreachable if x in cls._incoming_instances[category]] } cls.ansible_results[category] = cat_results cls.log.info("- [{}] playbook run results: {}".format( category, cat_results)) final_logfile = "/var/log/aws-quickstart-openshift-scaling.{}-{}-{}T{}{}".format( dt.year, dt.month, dt.day, dt.hour, dt.minute) os.rename(jout_file, final_logfile) cls.log.info("The json output logfile has been moved to %s" % (final_logfile))
#!/usr/bin/env python import argparse import subprocess import tempfile import shlex import time import sys from aws_openshift_quickstart.utils import * from aws_openshift_quickstart.logger import LogUtil LogUtil.set_log_handler('/var/log/openshift-quickstart-scaling.log') log = LogUtil.get_root_logger() def generate_inital_inventory_nodes(write_hosts_to_temp=False): """ Generates the initial ansible inventory. Instances only. """ # TODO: Add debugging statements def _varsplit(filename): if not os.path.exists(filename): return {} if os.path.getsize(filename) == 1: return {} _vs = {} with open(filename, 'r') as fo: varlines = fo.readlines() for l in varlines: try: l_stripped = l.strip('\n')
def __init__(self, json_doc): self.log = LogUtil.get_root_logger() self._instances = {'list': [], "scaling": []} self._asg = boto3.client('autoscaling', InventoryConfig.region_name) self.name = json_doc['AutoScalingGroupName'] self.private_ips = list() self.scaling_events = list() self.node_hostdefs = dict() self.scale_in_progress_instances = {'terminate': [], 'launch': []} self.cooldown = json_doc['DefaultCooldown'] self._cooldown_upperlimit = self.cooldown * 3 self.scale_override = False self.logical_name = None self.elb_name = None self.stack_id = None self.logical_id = None if self._cooldown_upperlimit <= 300: self._cooldown_upperlimit = 300 for tag in self._grab_tags(json_doc['Tags']): self.__dict__[tag['key']] = tag['value'] self.in_openshift_cluster = self._determine_cluster_membership() if self.in_openshift_cluster: self.openshift_config_category = self._determine_openshift_category(self.logical_id) # Set the logical_name self.logical_name = InventoryConfig.logical_names[self.logical_id] # Sanity check to verify they're in the API. # - and populate the InventoryConfig.all_instances dict as a result. # - working around edge cases. ilist = [i['InstanceId'] for i in json_doc['Instances']] InventoryScaling.wait_for_api(instance_id_list=ilist) # Grab instances for instance in self._grab_instance_metadata(json_doc['Instances']): self._instances[instance.InstanceId] = instance self._instances['list'].append(instance.InstanceId) self.private_ips += instance.private_ips # Grab scaling events. Anything newer than (self.cooldown * 3). # However, only do so if we're not populating the initial inventory. if not InventoryConfig.initial_inventory: for scaling_event in self._grab_current_scaling_events(): self.scaling_events.append(scaling_event) # If the instance is not already in the config. Done to compensate for the self._ # cooldown_upperlimit var. if (scaling_event.event_type == 'launch') and ( scaling_event.instance in InventoryConfig.known_instances.keys()): continue if (scaling_event.event_type == 'launch') and ( scaling_event.instance in self.scale_in_progress_instances['terminate']): continue self.scale_in_progress_instances[scaling_event.event_type].append(scaling_event.instance) self._instances['scaling'].append(scaling_event.instance) for instance in self._instances['list']: # Sanity check. # - If the instance is not in the known_instances list, or defined in a recent scaling event, # but is in the ASG (we dont know about it otherwise) # -- Add it to the scale_in_progress list, and set scale_override to True, so a scale-up occurs. # (See: scaler.scale_ if (instance not in InventoryConfig.known_instances.keys()) and ( instance not in self._instances['scaling']): self.scale_in_progress_instances['launch'].append(instance) self.scale_override = True # Grab Inventory host definitions for combined_hostdef in self.generate_asg_node_hostdefs(): instance_id, hostdef = combined_hostdef InventoryConfig.id_to_ip_map[instance_id] = hostdef['ip_or_dns'] del hostdef['ip_or_dns'] InventoryConfig.provisioning_hostdefs[instance_id] = hostdef self.node_hostdefs.update(hostdef)