Esempio n. 1
0
 def __init__(self, json_doc, required_stack_id=None):
     self.log = LogUtil.get_root_logger()
     self._instances = {'list':[], "scaling":[]}
     self._asg = boto3.client('autoscaling', InventoryConfig.region_name)
     self.name = json_doc['AutoScalingGroupName']
     self.private_ips = []
     self.node_hostdefs = []
     self.scale_override = True
     self.scaling_events = []
     self.scale_in_progress_instances = {'terminate':[], 'launch':[]}
     self.cooldown = json_doc['DefaultCooldown']
     self.logical_name = None
     self._cooldown_upperlimit = self.cooldown * 3
     if self._cooldown_upperlimit >= 300:
       self._cooldown_upperlimit = 300
     self.elb_name = None
     self.stack_id = None
     for tag in self._grab_tags(json_doc['Tags']):
         self.__dict__[tag['key']] = tag['value']
     self.in_openshift_cluster = self._determine_cluster_membership()
     if self.in_openshift_cluster:
         # Set the logcal_name
         self.logical_name = InventoryConfig.logical_names[self.logical_id]
         # Sanity check to verify they're in the API.
         # - and populate the InventoryConfig.all_instances dict as a result.
         # - working around edge cases.
         ilist = [i['InstanceId'] for i in json_doc['Instances']]
         InventoryScaling.wait_for_api(instance_id_list=ilist)
         # Grab instances
         for instance in self._grab_instance_metadata(json_doc['Instances']):
             self._instances[instance.InstanceId] = instance
             self._instances['list'].append(instance.InstanceId)
             self.private_ips += instance.private_ips
         # Grab scaling events. Anything newer than (self.cooldown * 3).
         # However, only do so if we're not populating the initial inventory.
         if not InventoryConfig.generate_initial_inventory:
             for scaling_event in self._grab_current_scaling_events():
                 self.scaling_events.append(scaling_event)
                 # If the instance is not already in the config. Done to compensate for the self._cooldown_upperlimit var.
                 if (scaling_event.type == 'launch') and (scaling_event.instance in InventoryConfig.known_instances.keys()):
                     continue
                 if (scaling_event.type == 'launch') and (scaling_event.instance in self.scale_in_progress_instances['terminate']):
                     continue
                 self.scale_in_progress_instances[scaling_event.type].append(scaling_event.instance)
                 self._instances['scaling'].append(scaling_event.instance)
             for instance in self._instances['list']:
               if (instance not in InventoryConfig.known_instances.keys()) and (instance not in self._instances['scaling']):
                 self.scale_in_progress_instances['launch'].append(instance)
                 self.scale_override = True
         self.openshift_config_category = self._determine_openshift_category(self.logical_id)
         # Grab Inventory host definitions
         for combined_hostdef in self.generate_asg_node_hostdefs():
             instance_id, hostdef = combined_hostdef
             self.node_hostdefs.append(hostdef)
             InventoryConfig.provisioning_hostdefs[instance_id] = hostdef
#!/usr/bin/env python
import argparse
import os
import subprocess
import tempfile
import shlex
import time
import sys
from aws_openshift_quickstart.utils import *
from aws_openshift_quickstart.logger import LogUtil
LogUtil.set_log_handler('/var/log/openshift-quickstart-scaling.log')
log = LogUtil.get_root_logger()

def generate_inital_inventory_nodes(write_hosts_to_temp=False):
    """
    Generates the initial ansible inventory. Instances only.
    """

    #TODO: Add debugging statements
    def _varsplit(filename):
        if not os.path.exists(filename):
            return {}
        if os.path.getsize(filename) == 1:
            return {}
        _vs={}
        with open(filename,'r') as f:
            varlines = f.readlines()
        for l in varlines:
            try:
                l_stripped = l.strip('\n')
                if l_stripped == '':
Esempio n. 3
0
 def __init__(self, json_doc, version='3.9'):
     self.log = LogUtil.get_root_logger()
     self._instances = {'list': [], "scaling": []}
     self._asg = boto3.client('autoscaling', InventoryConfig.region_name)
     self.name = json_doc['AutoScalingGroupName']
     self.private_ips = list()
     self.scaling_events = list()
     self.node_hostdefs = dict()
     self.scale_in_progress_instances = {'terminate': [], 'launch': []}
     self.cooldown = json_doc['DefaultCooldown']
     self._cooldown_upperlimit = self.cooldown * 3
     self.scale_override = False
     self.logical_name = None
     self.elb_name = None
     self.stack_id = None
     self.logical_id = None
     if self._cooldown_upperlimit <= 300:
         self._cooldown_upperlimit = 300
     for tag in self._grab_tags(json_doc['Tags']):
         self.__dict__[tag['key']] = tag['value']
     self.in_openshift_cluster = self._determine_cluster_membership()
     if self.in_openshift_cluster:
         self.openshift_config_category = self._determine_openshift_category(
             self.logical_id)
         # Set the logical_name
         self.logical_name = InventoryConfig.logical_names[self.logical_id]
         # Sanity check to verify they're in the API.
         # - and populate the InventoryConfig.all_instances dict as a result.
         # - working around edge cases.
         ilist = [i['InstanceId'] for i in json_doc['Instances']]
         InventoryScaling.wait_for_api(instance_id_list=ilist)
         # Grab instances
         for instance in self._grab_instance_metadata(
                 json_doc['Instances']):
             self._instances[instance.InstanceId] = instance
             self._instances['list'].append(instance.InstanceId)
             self.private_ips += instance.private_ips
         # Grab scaling events. Anything newer than (self.cooldown * 3).
         # However, only do so if we're not populating the initial inventory.
         if not InventoryConfig.initial_inventory:
             for scaling_event in self._grab_current_scaling_events():
                 self.scaling_events.append(scaling_event)
                 # If the instance is not already in the config. Done to compensate for the self._
                 # cooldown_upperlimit var.
                 if (scaling_event.event_type == 'launch') and (
                         scaling_event.instance
                         in InventoryConfig.known_instances.keys()):
                     continue
                 if (scaling_event.event_type == 'launch') and (
                         scaling_event.instance
                         in self.scale_in_progress_instances['terminate']):
                     continue
                 self.scale_in_progress_instances[
                     scaling_event.event_type].append(
                         scaling_event.instance)
                 self._instances['scaling'].append(scaling_event.instance)
             for instance in self._instances['list']:
                 # Sanity check.
                 # - If the instance is not in the known_instances list, or defined in a recent scaling event,
                 #   but is in the ASG (we dont know about it otherwise)
                 # -- Add it to the scale_in_progress list, and set scale_override to True, so a scale-up occurs.
                 #    (See: scaler.scale_
                 if (instance not in InventoryConfig.known_instances.keys()
                     ) and (instance not in self._instances['scaling']):
                     self.scale_in_progress_instances['launch'].append(
                         instance)
                     self.scale_override = True
         # Grab Inventory host definitions
         for combined_hostdef in self.generate_asg_node_hostdefs(version):
             instance_id, hostdef = combined_hostdef
             InventoryConfig.id_to_ip_map[instance_id] = hostdef[
                 'ip_or_dns']
             del hostdef['ip_or_dns']
             InventoryConfig.provisioning_hostdefs[instance_id] = hostdef
             self.node_hostdefs.update(hostdef)
Esempio n. 4
0
class InventoryScaling(object):
    """
    Class to faciliate scaling activities in the Cluster's Auto Scaling Groups.
    """
    log = LogUtil.get_root_logger()
    nodes_to_add = copy.deepcopy(InventoryConfig.asg_node_skel)
    nodes_to_remove = copy.deepcopy(InventoryConfig.asg_node_skel)

    nodes_to_add['combined'] = []
    nodes_to_remove['combined'] = []
    ansible_results = {}
    _client = None

    @classmethod
    def wait_for_api(cls, instance_id_list=None):
        """
        Wait for instances in (class).nodes_to_add to show up in DescribeInstances API Calls. From there,
        we add them to the InventoryConfig.all_instances dictionary. This is necessary to allow the
        instances to be written to the Inventory config file
        """
        if not instance_id_list:
            instance_id_list = cls.nodes_to_add['combined']

        cls.log.info(
            "[wait_for_api]: Waiting for the EC2 API to return new instances.")
        cls._client = boto3.client('ec2', InventoryConfig.region_name)
        waiter = cls._client.get_waiter('instance_exists')
        waiter.wait(InstanceIds=instance_id_list)

        for instance in cls._fetch_newly_launched_instances_from_api(
                cls.nodes_to_add['combined']):
            cls.log.debug(
                "[{}] has been detected in the API.".format(instance))
            InventoryConfig.all_instances[instance['InstanceId']] = instance
        cls.log.info("[wait_for_api] Complete")

    @classmethod
    def _fetch_newly_launched_instances_from_api(cls, instance_id_list):
        """
        Generator.
        Fetches the newly-launched instances from the API.
        """
        filters = [{'Name': 'instance-id', 'Values': instance_id_list}]
        all_instances = cls._client.describe_instances(
            Filters=filters)['Reservations']
        i = 0
        while i < len(all_instances):
            j = 0
            while j < len(all_instances[i]['Instances']):
                yield all_instances[i]['Instances'][j]
                j += 1
            i += 1

    @classmethod
    def process_pipeline(cls):
        """
        ClassMethod that
            - prunes the config, removing nodes that are terminating.
            - adds nodes to the config that just launched
        """
        cls.log.info("We're processing the scaling pipeline")
        # Remove the nodes (from config) that are terminating.
        if cls.nodes_to_remove['combined']:
            cls.log.info(
                "We have the following nodes to remove from the inventory:")
            cls.log.info("{}".format(cls.nodes_to_remove['combined']))
            for category in cls.nodes_to_remove.keys():
                if category == 'combined':
                    continue
                # cls.nodes_to_remove[category] is a list of instance IDs.
                cls.remove_node_from_section(cls.nodes_to_remove[category],
                                             category)
        else:
            cls.log.info("No nodes were found to remove from the inventory.")

        # Add the nodes that are launching.
        if cls.nodes_to_add['combined']:
            cls.log.info(
                "We have the following nodes to add to the inventory:")
            cls.log.info("{}".format([x
                                      for x in cls.nodes_to_add['combined']]))
            for category in cls.nodes_to_add.keys():
                if category == 'combined':
                    continue
                cls.log.debug("Adding nodes {} to the {} category".format(
                    cls.nodes_to_add[category], category))
                cls.add_nodes_to_section(cls.nodes_to_add[category], category)
            cls.log.info("Complete!")
        else:
            cls.log.info("No nodes were found to add to the inventory.")

    @classmethod
    def add_nodes_to_section(cls, nodes, category, fluff=True, migrate=False):
        """
        Adds a node (private IP / private DNS Entry) to a config section
        """
        acfg = InventoryConfig.ansible_host_cfg
        ic = InventoryConfig
        if not migrate:
            # dict. not list.
            if fluff:
                new_node_section = 'new_' + category
            else:
                new_node_section = category
            prov_sec = ic.inventory_categories['provision'][0]
            # FIXME: account for dict.
            for n in nodes:
                if n in ic.known_instances.keys():
                    continue
                acfg[new_node_section].update(
                    ic.provisioning_hostdefs[ic.ip_to_id_map[n]])
                acfg[prov_sec].update(
                    ic.provisioning_hostdefs[ic.ip_to_id_map[n]])
        else:
            # dict passed my the migrate wrapper.
            acfg[category].update(nodes)

    @classmethod
    def remove_node_from_section(cls,
                                 node,
                                 category,
                                 migrate=False,
                                 use_migration_dict=True):
        """
        ClassMethod to remove a list of nodes from a list of categories within the config file. .
        """
        migration_dict = {}
        categories = [category, '{}_{}'.format('new', category)]
        if migrate:
            # Leaving only new_{category}
            del categories[categories.index(category)]
        categories += InventoryConfig.inventory_categories['provision']
        for node_key in node:
            for cat in categories:
                try:
                    cls.log.info("Removing {} from category {}".format(
                        node_key, cat))
                    if migrate and use_migration_dict:
                        migration_dict.update({
                            node_key:
                            InventoryConfig.ansible_host_cfg[cat][node_key]
                        })
                    del InventoryConfig.ansible_host_cfg[cat][node_key]
                except KeyError:
                    cls.log.debug(
                        "{} wasn't present within {} after all.".format(
                            node_key, cat))
        if migrate:
            return migration_dict

    @classmethod
    def migrate_nodes_between_section(cls,
                                      nodes,
                                      category,
                                      additional_add=None):
        """
        Wrapper to migrate successful nodes between new_{category} and {category}
        labels within the Ansible inventory. Additionally removes node from the
        provisioning category.
        """
        if additional_add is None:
            additional_add = []
        cls.log.debug(
            "migrate_nodes_between_section - nodes: %s category: %s additional_add: %s"
            % (nodes, category, additional_add))
        add_dict = cls.remove_node_from_section(nodes, category, migrate=True)
        if 'master' in category:
            _ = cls.remove_node_from_section(nodes,
                                             'nodes',
                                             migrate=True,
                                             use_migration_dict=False)
        cls.add_nodes_to_section(add_dict, category, migrate=True)
        for addcat in additional_add:
            cls.add_nodes_to_section(add_dict, addcat, migrate=True)
        cls.log.info(
            "Nodes: {} have been permanately added to the Inventory under the {} category"
            .format(nodes, category))
        cls.log.info(
            "They've additionally been removed from the provision_in_progress category"
        )

    @classmethod
    def process_playbook_json_output(cls, jout_file, category):
        """
        Processes the output from the ansible playbook run and
        determines what hosts failed / were unreachable / succeeded.

        The results are put in (Class).ansible_results, keyed by category name.
        """
        # The json_end_idx reference below is important. The playbook run is in json output,
        # however the text we're opening here is a mix of free-text and json.
        # it's formatted like this.
        #   <optional> free text
        #   Giant Glob of JSON
        #   <optional> free text.
        # The json_end_idx variable in this function defines the end of the json.
        # Without it, JSON parsing will fail.
        dt = datetime.datetime.now()
        with open(jout_file, 'r') as f:
            all_output = f.readlines()
        if len(all_output) > 1:
            json_start_idx = all_output.index('{\n')
            json_end_idx, _ = max(enumerate(all_output),
                                  key=operator.itemgetter(1))
        else:
            if len(all_output) == 1:
                cls.log.error("ansible output:")
                cls.log.error(all_output[0])
            else:
                cls.log.error("ansible produced no output")
            raise Exception('Failed to parse ansible output')

        j = json.loads(''.join(all_output[json_start_idx:json_end_idx +
                                          1]))['stats']
        unreachable = []
        failed = []
        succeeded = []
        if 'localhost' in j.keys():
            del j['localhost']
        for h in j.keys():
            if j[h]['unreachable'] != 0:
                unreachable.append(h)
            elif j[h]['failures'] != 0:
                failed.append(h)
            else:
                succeeded.append(h)
        # ran into issues where etcd_prescale_down category key does not exist in the dict
        if category not in cls.nodes_to_add.keys():
            cls.nodes_to_add[category] = []
        # Pruning down to category only.
        cat_results = {
            'succeeded':
            [x for x in succeeded if x in cls.nodes_to_add[category]],
            'failed': [x for x in failed if x in cls.nodes_to_add[category]],
            'unreachable':
            [x for x in unreachable if x in cls.nodes_to_add[category]]
        }
        cls.ansible_results[category] = cat_results
        cls.log.info("- [{}] playbook run results: {}".format(
            category, cat_results))
        final_logfile = "/var/log/aws-quickstart-openshift-scaling.{}-{}-{}-{}T{}{}{}".format(
            category, dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
        os.rename(jout_file, final_logfile)
        cls.log.info("The json output logfile has been moved to %s" %
                     final_logfile)

    @classmethod
    def summarize_playbook_results(cls):
        cls.log.debug("ansible_results: %s" % cls.ansible_results)
        for cat in cls.ansible_results.keys():
            cls.log.debug(
                "running %s to see whether inventory must be updated" % cat)
            if not cat.startswith("pre_"):
                additional_add = []
                cjson = cls.ansible_results[cat]
                cls.log.debug("cjson: %s" % cjson)
                cls.log.info(
                    "Category: {}, Results: {} / {} / {}, ({} / {} / {})".
                    format(cat, len(cjson['succeeded']), len(cjson['failed']),
                           len(cjson['unreachable']), 'Succeeded', 'Failed',
                           'Unreachable'))
                if cat == 'masters':
                    additional_add = ['nodes']
                cls.log.debug(
                    "running cls.migrate_nodes_between_section(%s, %s, %s)" %
                    (cjson['succeeded'], cat, additional_add))
                cls.migrate_nodes_between_section(
                    cjson['succeeded'], cat, additional_add=additional_add)
Esempio n. 5
0
class InventoryConfig(object):
    """
    Class to hold all of the configuration related objects / methods
    Methods:
        - setup: Initial class setup.
        - populate_from_ansible_inventory: Populates the known_instances dict w/data from the ansible inventory.
        - _determine_region_name: Determines the region that the cluster is in.
        -
    """
    log = LogUtil.get_root_logger()
    initial_inventory = False
    scale = False
    id_to_ip_map = dict()
    ansible_host_cfg = dict()
    all_instances = dict()
    known_instances = dict()
    ansible_inventory_file = '/etc/ansible/hosts'
    ansible_playbook_wrapper = "/usr/share/ansible/openshift-ansible/scaleup_wrapper.yml"
    playbooks = dict()
    playbook_directory = "/usr/share/ansible/openshift-ansible/"
    pre_scaleup_playbook = "{}{}".format(playbook_directory, "pre_scaleup.yml")
    pre_scaledown_playbook = "{}{}".format(playbook_directory,
                                           "pre_scaledown.yml")
    post_scaleup_playbook = "{}{}".format(playbook_directory,
                                          "post_scaleup.yml")
    post_scaledown_playbook = "{}{}".format(playbook_directory,
                                            "post_scaledown.yml")
    inventory_categories = {
        "master": ["masters", "new_masters"],
        "etcd": ["etcd", "new_etcd"],
        "node": ["nodes", "new_nodes"],
        "glusterfs": ["glusterfs", "new_glusterfs"],
        "provision": ["provision_in_progress"]
    }
    inventory_node_skel = {
        "master": [],
        "etcd": [],
        "node": [],
        "glusterfs": [],
        "provision": []
    }
    asg_node_skel = {
        "masters": [],
        "etcd": [],
        "nodes": [],
        "glusterfs": [],
        "provision": []
    }
    ansible_full_cfg = {}
    provisioning_hostdefs = {}
    inventory_nodes = copy.deepcopy(inventory_node_skel)
    inventory_nodes['ids'] = {}
    logical_names = {
        "OpenShiftEtcdASG": "etcd",
        "OpenShiftMasterASG": "masters",
        "OpenShiftNodeASG": "nodes",
        "OpenShiftGlusterASG": "glusterfs"
    }
    stack_id = None
    ec2 = None
    region_name = None
    instance_id = None
    ip_to_id_map = None

    @classmethod
    def setup(cls):
        """
        function to setup the variables initially (populate from inventory, etc)
        """
        cls.log.info("Setting up the InventoryConfig Class")
        if not cls.initial_inventory:
            cls.load_ansible_inventory_file()
        cls.region_name = cls._determine_region_name()
        cls.instance_id = cls._determine_local_instance_id()
        cls.ec2 = boto3.client('ec2', cls.region_name)
        for tag in cls._grab_local_tags():
            cls.log.debug(
                "Applying: [{}] / Value [{}] - as a method within the cluster."
                .format(tag['key'], tag['value']))
            setattr(cls, tag['key'], tag['value'])
        for instance in cls._grab_all_instances():
            iid = instance['InstanceId']
            cls.all_instances[iid] = instance
        cls.log.debug("The EC2 API Told me about these instances: {}".format(
            cls.all_instances.keys()))
        cls.log.info("InventoryConfig setup complete!")

    @classmethod
    def load_ansible_inventory_file(cls):
        cls.log.info("Loading ansible inventory file from on-disk...")
        try:
            with open(cls.ansible_inventory_file, 'r') as f:
                unparsed_document = f.read()
            parsed_document = yaml.load(unparsed_document)
        except Exception as e:
            raise e
        cls.ansible_full_cfg = parsed_document
        for (k, v) in parsed_document['OSEv3']['children'].iteritems():
            if len(v) == 0:
                continue
            cls.ansible_host_cfg[k] = v['hosts']
        cls.log.info("...Complete")

    @classmethod
    def write_ansible_inventory_file(cls, init=False):
        if not init:
            transformed_host_cfg = {
                k: {
                    'hosts': v
                }
                for (k, v) in cls.ansible_host_cfg.iteritems()
            }
            cls.ansible_full_cfg['OSEv3']['children'].update(
                transformed_host_cfg)
        with open(cls.ansible_inventory_file, 'w') as f:
            f.write(yaml.dump(cls.ansible_full_cfg, default_flow_style=False))

    @classmethod
    def verify_required_sections_exist(cls, generate=False):
        """
        Verifies that the required sections exist within the Inventory.
        Ex: new_(masters|nodes|etcd), provision_in_progress
        """
        save_needed = False
        sections = [
            y for x in cls.inventory_categories.itervalues() for y in x
        ]
        cls.log.info(
            "I'm now verifying that all required sections are present in our runtime config..."
        )
        if generate:
            cls.log.info("Accounting for initial inventory generation")
            compare_dict = cls.ansible_full_cfg['OSEv3']['children']
        else:
            compare_dict = cls.ansible_host_cfg
        for section in sections:
            if section not in compare_dict.keys():
                save_needed = True
                compare_dict[section] = {}
                cls.log.info(
                    "The section [{}] was not present in the Ansible Inventory. I'll add it..."
                    .format(section))
        cls.log.info("...Complete.")
        if save_needed:
            if generate:
                cls.ansible_full_cfg['OSEv3']['children'] = compare_dict
            else:
                cls.ansible_host_cfg = compare_dict

    @classmethod
    def populate_from_ansible_inventory(cls):
        """
        Populates the InventoryConfig class with data from the existing anisble inventory
        """
        cls.log.info(
            "We're populating the runtime config from data within the Ansible Inventory"
        )
        ac = cls.ansible_host_cfg
        ic = cls.inventory_categories
        for category, subcategory in ic.iteritems():
            cls.log.debug("Category: {}".format(category))
            if category == 'provision':
                continue
            for sc in subcategory:
                if sc != category:
                    cls.log.debug("\tSubcategory: [{}]/{}".format(
                        category, sc))
                if len(ac[sc]) == 0:
                    cls.log.debug(
                        "\t No hosts within this subcategory. Moving on.")
                    continue
                for x, y in ac[sc].iteritems():
                    ip = x
                    try:
                        instance_id = y['instance_id']
                    except KeyError:
                        cls.log.info(
                            "Not able to associate an Instance ID with the Private DNS Entry: {}."
                            .format(ip))
                        continue
                    cls.inventory_nodes[category].append(x)
                    cls.id_to_ip_map[instance_id] = x
                    cls.log.debug("I just added {} to the {} category".format(
                        ip, category))
                    cls.known_instances[instance_id] = ip
                    cls.log.debug(
                        "The Instance ID {} has been tied to the Private DNS Entry: {}"
                        .format(instance_id, ip))

    @classmethod
    def _determine_region_name(cls):
        """
        Queryies the metadata service to determine the current Availability Zone.
        Extrapolates the region based on the AZ returned
        """
        resp = requests.get(
            'http://169.254.169.254/latest/meta-data/placement/availability-zone'
        )
        return resp.text[:-1]

    @classmethod
    def _determine_local_instance_id(cls):
        """
        Queries the metadata service to determine the local instance ID
        """
        resp = requests.get(
            'http://169.254.169.254/latest/meta-data/instance-id')
        return resp.text

    @classmethod
    def _grab_all_instances(cls):
        """
        Generator around an ec2.describe_instances() call.
        Uses a filter to narrow down results.
        """
        filters = [{
            "Name": "tag:aws:cloudformation:stack-id",
            "Values": [InventoryConfig.stack_id]
        }]
        all_instances = cls.ec2.describe_instances(
            Filters=filters)['Reservations']

        i = 0
        while i < len(all_instances):
            j = 0
            while j < len(all_instances[i]['Instances']):
                yield all_instances[i]['Instances'][j]
                j += 1
            i += 1

    @classmethod
    def _grab_local_tags(cls):
        """
        Grabs the Cloudformation-set tags on the local instance.
        Dependent on the results of _determine_local_instance_id()
        """
        ec2 = boto3.resource('ec2', cls.region_name)
        local_instance = ec2.Instance(cls.instance_id)
        i = 0
        while i < len(local_instance.tags):
            if 'cloudformation' in local_instance.tags[i]['Key']:
                _k = local_instance.tags[i]['Key'].split(':')[2]
                yield {
                    'key': _k.replace('-', '_'),
                    'value': local_instance.tags[i]['Value']
                }
            i += 1
Esempio n. 6
0
class InventoryConfig(object):
    """
    Class to hold all of the configuration related objects / methods
    Methods:
        - setup: Initial class setup.
        - populate_from_ansible_inventory: Populates the known_instances dict w/data from the ansible inventory.
        - _determine_region_name: Determines the region that the cluster is in.
        -
    """
    log = LogUtil.get_root_logger()
    region_name = None
    instance_id = None
    scale = False
    all_instances = {}
    known_instances = {}
    known_instances_iplist = []
    _instance_pattern = 'i-[0-9a-z]+'
    generate_initial_inventory = False
    inventory_file = '/etc/ansible/hosts'
    ansible_playbook_wrapper = "/usr/share/ansible/openshift-ansible/scaleup_wrapper.yml"
    inventory_categories = {
        "master": ["masters", "new_masters"],
        "etcd": ["etcd", "new_etcd"],
        "node": ["nodes", "new_nodes"],
        "provision": ["provision_in_progress"]
    }
    _inventory_node_skel = {
        "master": [],
        "etcd": [],
        "node": [],
        "provision": []
    }
    _asg_node_skel = {"masters": [], "etcd": [], "nodes": [], "provision": []}
    provisioning_hostdefs = {}
    inventory_nodes = copy.deepcopy(_inventory_node_skel)
    inventory_nodes['ids'] = {}
    logical_names = {
        "OpenShiftEtcdASG": "etcd",
        "OpenShiftMasterASG": "masters",
        "OpenShiftNodeASG": "nodes"
    }

    @classmethod
    def setup(cls):
        """
        function to setup the variables initially (populate from inventory, etc)
        """
        cls.log.info("Setting up the InventoryConfig Class")
        cls.region_name = cls._determine_region_name()
        cls.instance_id = cls._determine_local_instance_id()
        cls.ec2 = boto3.client('ec2', cls.region_name)
        for tag in cls._grab_local_tags():
            cls.log.debug(
                "Applying: [{}] / Value [{}] - as a method within the cluster."
                .format(tag['key'], tag['value']))
            setattr(cls, tag['key'], tag['value'])
        for instance in cls._grab_all_instances():
            iid = instance['InstanceId']
            cls.all_instances[iid] = instance
        cls.log.debug("The EC2 API Told me about these instances: {}".format(
            cls.all_instances.keys()))
        cls.log.info("InventoryConfig setup complete!")

    @classmethod
    def verify_required_sections_exist(cls):
        """
        Verifies that the required sections exist within the Inventory.
        Ex: new_(masters|nodes|etcd)
        """
        inventory_config = InventoryConfig.c
        a = cls.inventory_categories
        for x in a:
            for y in a[x]:
                if not inventory_config.has_section(y):
                    inventory_config.add_section(y)
                    if 'provision' not in y:
                        inventory_config.set('OSEv3:children', y)

    @classmethod
    def populate_from_ansible_inventory(cls):
        """
        Populates the InventoryConfig class with data from the existing anisble inventory
        """
        cls.log.info(
            "We're populating the runtime config from data within the Ansible Inventory"
        )
        inventory_config = InventoryConfig.c
        for category in cls.inventory_categories.keys():
            cls.log.debug("Category: {}".format(category))
            if category == 'provision':
                continue
            for subcategory in cls.inventory_categories[category]:
                cls.log.debug("\tSubcategory: {}".format(subcategory))
                for key in inventory_config.options(subcategory):
                    ip = key.split()[0]
                    cls.inventory_nodes[category].append(ip)
                    cls.log.debug("I just added {} to the {} category".format(
                        ip, category))
                    _pattern = re.compile(cls._instance_pattern)
                    _host_kv = inventory_config.get(subcategory, key)
                    if _host_kv == None:
                        _search_string = key
                    else:
                        _search_string = _host_kv
                    _instance_id = _pattern.search(_search_string).group()
                    if _instance_id:
                        cls.known_instances[_instance_id] = ip
                        cls.known_instances_iplist.append(ip)
                        cls.log.debug(
                            "The Instance ID {} has been tied to the Private DNS Entry: {}"
                            .format(_instance_id, ip))
                    else:
                        cls.log.debug("No instance ID was found!")

    @classmethod
    def _determine_region_name(cls):
        """
        Queryies the metadata service to determine the current Availability Zone.
        Extrapolates the region based on the AZ returned
        """
        resp = requests.get(
            'http://169.254.169.254/latest/meta-data/placement/availability-zone'
        )
        return resp.text[:-1]

    @classmethod
    def _determine_local_instance_id(cls):
        """
        Queries the metadata service to determine the local instance ID
        """
        resp = requests.get(
            'http://169.254.169.254/latest/meta-data/instance-id')
        return resp.text

    @classmethod
    def _grab_all_instances(cls):
        """
        Generator around an ec2.describe_instances() call.
        Uses a filter to narrow down results.
        """
        filters = [{
            "Name": "tag:aws:cloudformation:stack-id",
            "Values": [InventoryConfig.stack_id]
        }]
        all_instances = cls.ec2.describe_instances(
            Filters=filters)['Reservations']

        i = 0
        while i < len(all_instances):
            j = 0
            while j < len(all_instances[i]['Instances']):
                yield all_instances[i]['Instances'][j]
                j += 1
            i += 1

    @classmethod
    def _grab_local_tags(cls):
        """
        Grabs the Cloudformation-set tags on the local instance.
        Dependent on the results of _determine_local_instance_id()
        """
        ec2 = boto3.resource('ec2', cls.region_name)
        local_instance = ec2.Instance(cls.instance_id)
        i = 0
        while i < len(local_instance.tags):
            if 'cloudformation' in local_instance.tags[i]['Key']:
                _k = local_instance.tags[i]['Key'].split(':')[2]
                yield {
                    'key': _k.replace('-', '_'),
                    'value': local_instance.tags[i]['Value']
                }
            i += 1
Esempio n. 7
0
class InventoryScaling(object):
    """
    Class to faciliate scaling activities in the Cluster's Auto Scaling Groups.
    """
    log = LogUtil.get_root_logger()
    _incoming_instances = copy.deepcopy(InventoryConfig._inventory_node_skel)
    nodes_to_add = copy.deepcopy(InventoryConfig._asg_node_skel)
    nodes_to_remove = copy.deepcopy(InventoryConfig._asg_node_skel)

    nodes_to_add['combined'] = []
    nodes_to_remove['combined'] = []
    ansible_results = {}

    @classmethod
    def wait_for_api(cls, instance_id_list=[]):
        """
        Wait for instances in (class).nodes_to_add to show up in DescribeInstances API Calls. From there, we add them to the InventoryConfig.all_instances dictionary. This is necessary to allow the instances to be written to the Inventory config file
        """
        if not instance_id_list:
            instance_id_list = cls.nodes_to_add['combined']

        cls.log.info(
            "[wait_for_api]: Waiting for the EC2 API to return new instances.")
        cls._client = boto3.client('ec2', InventoryConfig.region_name)
        waiter = cls._client.get_waiter('instance_exists')
        waiter.wait(InstanceIds=cls.nodes_to_add['combined'])

        for instance in cls._fetch_newly_launched_instances_from_api(
                cls.nodes_to_add['combined']):
            cls.log.debug(
                "[{}] has been detected in the API.".format(instance))
            InventoryConfig.all_instances[instance['InstanceId']] = instance
        cls.log.info("[wait_for_api] Complete")

    @classmethod
    def _fetch_newly_launched_instances_from_api(cls, instance_id_list):
        """
        Generator.
        Fetches the newly-launched instances from the API.
        """
        all_instances = cls._client.describe_instances(
            InstanceIds=instance_id_list)['Reservations']
        i = 0
        while i < len(all_instances):
            j = 0
            while j < len(all_instances[i]['Instances']):
                yield all_instances[i]['Instances'][j]
                j += 1
            i += 1

    @classmethod
    def process_pipeline(cls):
        """
        ClassMethod that
            - prunes the config, removing nodes that are terminating.
            - adds nodes to the config that just launched
        """
        cls.log.info("We're processing the scaling pipeline")
        # Remove the nodes (from config) that are terminating.
        if cls.nodes_to_remove['combined']:
            cls.log.info(
                "We have the following nodes to remove from the inventory:")
            cls.log.info("{}".format(cls.nodes_to_remove['combined']))
            for category in cls.nodes_to_remove.keys():
                if category == 'combined':
                    continue
                cls.remove_node_from_section(cls.nodes_to_remove[category],
                                             category)
        else:
            cls.log.info("No nodes were found to remove from the inventory.")

        # Add the nodes that are launching.
        if cls.nodes_to_add['combined']:
            cls.log.info(
                "We have the following nodes to add to the inventory:")
            cls.log.info("{}".format(
                [x.split()[0] for x in cls.nodes_to_add['combined']]))
            for category in cls.nodes_to_add.keys():
                if category == 'combined':
                    continue
                cls._incoming_instances[category] = [
                    x.split()[0] for x in cls.nodes_to_add[category]
                ]
                cls.log.debug("Adding nodes {} to the {} category".format(
                    cls._incoming_instances[category], category))
                cls.add_nodes_to_section(cls.nodes_to_add[category], category)
            cls.log.info("Complete!")
        else:
            cls.log.info("No nodes were found to add to the inventory.")

    @classmethod
    def add_nodes_to_section(cls,
                             node_list,
                             category,
                             fluff=True,
                             migrate=False):
        """
        Adds a node (private IP) to a config section
        """
        c = InventoryConfig.c
        if not migrate:
            _provisioning_section = InventoryConfig.inventory_categories[
                'provision'][0]
            if not c.has_section(_provisioning_section):
                c.set('OSEv3:children', _provisioning_section)
            if not c.has_section('new_' + category):
                if category == 'provision':
                    pass
                else:
                    c.set('OSEv3:children', 'new_' + category)

            for n in node_list:
                ip = n.split()[0]
                if ip in InventoryConfig.known_instances_iplist:
                    continue
                if fluff:
                    _new_node_section = 'new_' + category
                else:
                    _new_node_section = category

                if not c.has_option(_provisioning_section, ip):
                    c.set(_provisioning_section, ip)
                if not c.has_option(_new_node_section, n):
                    c.set(_new_node_section, n)
        else:
            for n in node_list:
                c.set(category, n)

    @classmethod
    def remove_node_from_section(cls, node, category, migrate=False):
        """
        ClassMethod to remove a list of nodes from a list of categories within the config file. .
        """
        c = InventoryConfig.c
        migration_removed = []
        categories = [category, '{}_{}'.format('new', category)]
        if migrate:
            del categories[categories.index(category)]
        categories += InventoryConfig.inventory_categories['provision']
        for cat in categories:
            for n in node:
                if 'provision' in cat:
                    c.remove_option(cat, n)
                    continue
                for idx in c.options(cat):
                    if idx.split()[0] == n:
                        if migrate:
                            full_idx = c.get(cat, idx)
                            if full_idx is None:
                                full_idx = idx
                            migration_removed.append(full_idx)
                        c.remove_option(cat, idx)
                        continue
        if migrate:
            return migration_removed

    @classmethod
    def migrate_nodes_between_section(cls, node, category):
        """
        Wrapper to migrate successful nodes between new_{category} and {category}
        labels within the Ansible inventory. Additionally removes node from the
        provisioning category.
        """
        addlist = cls.remove_node_from_section(node, category, migrate=True)
        if 'master' in category:
            _ = cls.remove_node_from_section(node, 'nodes', migrate=True)
        cls.add_nodes_to_section(addlist, category, migrate=True)
        cls.log.info(
            "Nodes: {} have been permanately added to the Inventory under the {} category"
            .format(node, category))
        cls.log.info(
            "They've additionally been removed from the provision_in_progress category"
        )

    @classmethod
    def process_playbook_json_output(cls, jout_file, category):
        """
        Processes the output from the ansible playbook run and
        determines what hosts failed / were unreachable / succeeded.

        The results are put in (Class).ansible_results, keyed by category name.
        """
        # The json_end_idx reference below is important. The playbook run is in json output,
        # however the text we're opening here is a mix of free-text and json.
        # it's formatted like this.
        #   <optional> free text
        #   Giant Glob of JSON
        #   <optional> free text.
        # The json_end_idx variable in this function defines the end of the json.
        # Without it, JSON parsing will fail.
        dt = datetime.datetime.now()
        with open(jout_file, 'r') as f:
            all_output = f.readlines()
        if len(all_output) > 1:
            json_start_idx = all_output.index('{\n')
            json_end_idx, _ = max(enumerate(all_output),
                                  key=operator.itemgetter(1))
        else:
            idx = 0

        j = json.loads(''.join(all_output[json_start_idx:json_end_idx +
                                          1]))['stats']
        unreachable = []
        failed = []
        succeeded = []
        del j['localhost']
        for h in j.keys():
            if j[h]['unreachable'] != 0:
                unreachable.append(h)
            elif j[h]['failures'] != 0:
                failed.append(h)
            else:
                succeeded.append(h)
        # Pruning down to category only.
        cat_results = {
            'succeeded':
            [x for x in succeeded if x in cls._incoming_instances[category]],
            'failed':
            [x for x in failed if x in cls._incoming_instances[category]],
            'unreachable':
            [x for x in unreachable if x in cls._incoming_instances[category]]
        }
        cls.ansible_results[category] = cat_results
        cls.log.info("- [{}] playbook run results: {}".format(
            category, cat_results))
        final_logfile = "/var/log/aws-quickstart-openshift-scaling.{}-{}-{}T{}{}".format(
            dt.year, dt.month, dt.day, dt.hour, dt.minute)
        os.rename(jout_file, final_logfile)
        cls.log.info("The json output logfile has been moved to %s" %
                     (final_logfile))
#!/usr/bin/env python
import argparse
import subprocess
import tempfile
import shlex
import time
import sys
from aws_openshift_quickstart.utils import *
from aws_openshift_quickstart.logger import LogUtil

LogUtil.set_log_handler('/var/log/openshift-quickstart-scaling.log')
log = LogUtil.get_root_logger()


def generate_inital_inventory_nodes(write_hosts_to_temp=False):
    """
    Generates the initial ansible inventory. Instances only.
    """

    # TODO: Add debugging statements
    def _varsplit(filename):
        if not os.path.exists(filename):
            return {}
        if os.path.getsize(filename) == 1:
            return {}
        _vs = {}
        with open(filename, 'r') as fo:
            varlines = fo.readlines()
        for l in varlines:
            try:
                l_stripped = l.strip('\n')
 def __init__(self, json_doc):
     self.log = LogUtil.get_root_logger()
     self._instances = {'list': [], "scaling": []}
     self._asg = boto3.client('autoscaling', InventoryConfig.region_name)
     self.name = json_doc['AutoScalingGroupName']
     self.private_ips = list()
     self.scaling_events = list()
     self.node_hostdefs = dict()
     self.scale_in_progress_instances = {'terminate': [], 'launch': []}
     self.cooldown = json_doc['DefaultCooldown']
     self._cooldown_upperlimit = self.cooldown * 3
     self.scale_override = False
     self.logical_name = None
     self.elb_name = None
     self.stack_id = None
     self.logical_id = None
     if self._cooldown_upperlimit <= 300:
         self._cooldown_upperlimit = 300
     for tag in self._grab_tags(json_doc['Tags']):
         self.__dict__[tag['key']] = tag['value']
     self.in_openshift_cluster = self._determine_cluster_membership()
     if self.in_openshift_cluster:
         self.openshift_config_category = self._determine_openshift_category(self.logical_id)
         # Set the logical_name
         self.logical_name = InventoryConfig.logical_names[self.logical_id]
         # Sanity check to verify they're in the API.
         # - and populate the InventoryConfig.all_instances dict as a result.
         # - working around edge cases.
         ilist = [i['InstanceId'] for i in json_doc['Instances']]
         InventoryScaling.wait_for_api(instance_id_list=ilist)
         # Grab instances
         for instance in self._grab_instance_metadata(json_doc['Instances']):
             self._instances[instance.InstanceId] = instance
             self._instances['list'].append(instance.InstanceId)
             self.private_ips += instance.private_ips
         # Grab scaling events. Anything newer than (self.cooldown * 3).
         # However, only do so if we're not populating the initial inventory.
         if not InventoryConfig.initial_inventory:
             for scaling_event in self._grab_current_scaling_events():
                 self.scaling_events.append(scaling_event)
                 # If the instance is not already in the config. Done to compensate for the self._
                 # cooldown_upperlimit var.
                 if (scaling_event.event_type == 'launch') and (
                         scaling_event.instance in InventoryConfig.known_instances.keys()):
                     continue
                 if (scaling_event.event_type == 'launch') and (
                         scaling_event.instance in self.scale_in_progress_instances['terminate']):
                     continue
                 self.scale_in_progress_instances[scaling_event.event_type].append(scaling_event.instance)
                 self._instances['scaling'].append(scaling_event.instance)
             for instance in self._instances['list']:
                 # Sanity check.
                 # - If the instance is not in the known_instances list, or defined in a recent scaling event,
                 #   but is in the ASG (we dont know about it otherwise)
                 # -- Add it to the scale_in_progress list, and set scale_override to True, so a scale-up occurs.
                 #    (See: scaler.scale_
                 if (instance not in InventoryConfig.known_instances.keys()) and (
                         instance not in self._instances['scaling']):
                     self.scale_in_progress_instances['launch'].append(instance)
                     self.scale_override = True
         # Grab Inventory host definitions
         for combined_hostdef in self.generate_asg_node_hostdefs():
             instance_id, hostdef = combined_hostdef
             InventoryConfig.id_to_ip_map[instance_id] = hostdef['ip_or_dns']
             del hostdef['ip_or_dns']
             InventoryConfig.provisioning_hostdefs[instance_id] = hostdef
             self.node_hostdefs.update(hostdef)