Example #1
0
 def _setup_test_asg_to_be_deleted(self):
     """
     Setup a test ASG that is tagged to be deleted.
     """
     # pylint: disable=attribute-defined-outside-init
     self.test_asg_name = "test-asg-random-tags"
     self.test_autoscale = boto.connect_autoscale()
     launch_config = boto.ec2.autoscale.LaunchConfiguration(
         name='my-launch_config',
         image_id='my-ami',
         key_name='my_key_name',
         security_groups=['my_security_groups']
     )
     self.test_autoscale.create_launch_configuration(launch_config)
     asg = boto.ec2.autoscale.AutoScalingGroup(
         group_name=self.test_asg_name,
         load_balancers=['my-lb'],
         availability_zones=['us-east-1a', 'us-east-1b'],
         launch_config=launch_config,
         min_size=4,
         max_size=8,
         connection=self.test_autoscale
     )
     create_elb('my-lb')
     self.test_autoscale.create_auto_scaling_group(asg)
     ec2.tag_asg_for_deletion(self.test_asg_name, 0)
     self.test_asg = self.test_autoscale.get_all_groups([self.test_asg_name])[0]
Example #2
0
 def _setup_test_asg_to_be_deleted(self):
     """
     Setup a test ASG that is tagged to be deleted.
     """
     # pylint: disable=attribute-defined-outside-init
     self.test_asg_name = "test-asg-random-tags"
     self.test_autoscale = boto.connect_autoscale()
     launch_config = boto.ec2.autoscale.LaunchConfiguration(
         name='my-launch_config',
         image_id='my-ami',
         key_name='my_key_name',
         security_groups=['my_security_groups'])
     self.test_autoscale.create_launch_configuration(launch_config)
     asg = boto.ec2.autoscale.AutoScalingGroup(
         group_name=self.test_asg_name,
         load_balancers=['my-lb'],
         availability_zones=['us-east-1a', 'us-east-1b'],
         launch_config=launch_config,
         min_size=4,
         max_size=8,
         connection=self.test_autoscale)
     create_elb('my-lb')
     self.test_autoscale.create_auto_scaling_group(asg)
     ec2.tag_asg_for_deletion(self.test_asg_name, 0)
     self.test_asg = self.test_autoscale.get_all_groups(
         [self.test_asg_name])[0]
Example #3
0
def _red_black_deploy(
        new_cluster_asgs, baseline_cluster_asgs,
        secs_before_old_asgs_disabled=DISABLE_OLD_ASG_WAIT_TIME
):
    """
    Takes two dicts of autoscale groups, new and baseline.
    Each dict key is a cluster name.
    Each dict value is a list of ASGs for that cluster.
    Enables the new ASGs, then disables the old ASGs.

    Red/black deploy refers to:
        - Existing ASG is "red", meaning active.
        - New ASG begins as "black", meaning inactive.
        - The new ASG is added to the ELB, making it "red".
            - The baseline and new ASGs are now existing as "red/red".
        - The baseline ASG is removed from the ELB.
            - As traffic has ceased to be directed to the baseline ASG, it becomes "black".

    Workflow:
        - enable new ASGs
        - wait for instances to be healthy in the load balancer
        - ensure the new ASGs are not pending delete or disabled
        - tag and disable current asgs

    Args:
        new_asgs (dict): List of new ASGs to be added to the ELB, keyed by cluster.
        baseline_asgs (dict): List of existing ASGs already added to the ELB, keyed by cluster.

    Returns:
        success (bool): True if red/black operation succeeded, else False.
        asgs_enabled (dict): List of ASGs that are added to the ELB, keyed by cluster.
        asgs_disabled (dict): List of ASGs that are removed from the ELB, keyed by cluster.
    """
    asgs_enabled = copy.deepcopy(baseline_cluster_asgs)
    asgs_disabled = copy.deepcopy(new_cluster_asgs)

    def _enable_cluster_asg(cluster, asg):
        """
        Shifts ASG from disabled to enabled.
        """
        enable_asg(asg)
        _move_asg_from_disabled_to_enabled(cluster, asg)

    def _disable_cluster_asg(cluster, asg):
        """
        Shifts ASG from enabled to disabled.
        """
        disable_asg(asg)
        _move_asg_from_enabled_to_disabled(cluster, asg)

    def _move_asg_from_disabled_to_enabled(cluster, asg):
        """
        Shifts ASG from disabled to enabled.
        """
        asgs_enabled[cluster].append(asg)
        asgs_disabled[cluster].remove(asg)

    def _move_asg_from_enabled_to_disabled(cluster, asg):
        """
        Shifts ASG from enabled to disabled.
        """
        asgs_enabled[cluster].remove(asg)
        asgs_disabled[cluster].append(asg)

    def _disable_clustered_asgs(clustered_asgs, failure_msg):
        """
        Disable all the ASGs in the lists, keyed by cluster.
        """
        for cluster, asgs in six.iteritems(clustered_asgs):
            for asg in asgs:
                try:
                    _disable_cluster_asg(cluster, asg)
                except:  # pylint: disable=bare-except
                    LOG.warning(failure_msg, asg, exc_info=True)

    elbs_to_monitor = []
    newly_enabled_asgs = defaultdict(list)
    for cluster, asgs in six.iteritems(new_cluster_asgs):
        for asg in asgs:
            try:
                _enable_cluster_asg(cluster, asg)
                elbs_to_monitor.extend(elbs_for_asg(asg))
                newly_enabled_asgs[cluster].append(asg)
            except:  # pylint: disable=bare-except
                LOG.error("Error enabling ASG '%s'. Disabling traffic to all new ASGs.", asg, exc_info=True)
                # Disable the ASG which failed first.
                _disable_cluster_asg(cluster, asg)
                # Then disable any new other ASGs that have been newly enabled.
                _disable_clustered_asgs(
                    newly_enabled_asgs,
                    "Unable to disable ASG '%s' after failure."
                )
                return (False, asgs_enabled, asgs_disabled)

    LOG.info("New ASGs {} are active and will be available after passing the healthchecks.".format(
        dict(newly_enabled_asgs)
    ))

    # Wait for all instances to be in service in all ELBs.
    try:
        ec2.wait_for_healthy_elbs(elbs_to_monitor, 600)
    except:  # pylint: disable=bare-except
        LOG.info("Some ASGs are failing ELB health checks. Disabling traffic to all new ASGs.", exc_info=True)
        _disable_clustered_asgs(
            newly_enabled_asgs,
            "Unable to disable ASG '%s' after waiting for healthy ELBs."
        )
        return (False, asgs_enabled, asgs_disabled)

    # Add a sleep delay here to wait and see how the new ASGs react to traffic.
    # A flawed release would likely make the new ASGs fail the health checks below
    # and, if any new ASGs fail the health checks, the old ASGs would *not be disabled.
    time.sleep(secs_before_old_asgs_disabled)

    # Ensure the new ASGs are still healthy and not pending delete before disabling the old ASGs.
    for cluster, asgs in six.iteritems(newly_enabled_asgs):
        for asg in asgs:
            err_msg = None
            if is_asg_pending_delete(asg):
                err_msg = "New ASG '{}' is pending delete.".format(asg)
            elif not is_asg_enabled(asg):
                err_msg = "New ASG '{}' is not enabled.".format(asg)
            if err_msg:
                LOG.error("{} Aborting disabling of old ASGs.".format(err_msg))
                return (False, asgs_enabled, asgs_disabled)

    LOG.info("New ASGs have passed the healthchecks. Now disabling old ASGs.")

    for cluster, asgs in six.iteritems(baseline_cluster_asgs):
        for asg in asgs:
            try:
                if is_asg_enabled(asg):
                    try:
                        _disable_cluster_asg(cluster, asg)
                    except:  # pylint: disable=bare-except
                        LOG.warning("Unable to disable ASG '%s' after enabling new ASGs.", asg, exc_info=True)
                elif asg in asgs_enabled[cluster]:
                    # If the asg is not enabled, but we have it in the enabled list remove it. This may occur by
                    # pulling from 2 different sources of truth at different intervals. The asg could have been disabled
                    # in the intervening time.
                    _move_asg_from_enabled_to_disabled(cluster, asg)
            except ASGDoesNotExistException:
                # This operation should not fail if one of the baseline ASGs was removed during the deployment process
                LOG.info("ASG {asg} in cluster {cluster} no longer exists, removing it from the enabled cluster list"
                         .format(asg=asg, cluster=cluster))
                _move_asg_from_enabled_to_disabled(cluster, asg)

            try:
                ec2.tag_asg_for_deletion(asg)
            except ASGDoesNotExistException:
                LOG.info("Unable to tag ASG '{}' as it no longer exists, skipping.".format(asg))

    return (True, asgs_enabled, asgs_disabled)
Example #4
0
def _red_black_deploy(
        new_cluster_asgs, baseline_cluster_asgs,
        secs_before_old_asgs_disabled=DISABLE_OLD_ASG_WAIT_TIME
):
    """
    Takes two dicts of autoscale groups, new and baseline.
    Each dict key is a cluster name.
    Each dict value is a list of ASGs for that cluster.
    Enables the new ASGs, then disables the old ASGs.

    Red/black deploy refers to:
        - Existing ASG is "red", meaning active.
        - New ASG begins as "black", meaning inactive.
        - The new ASG is added to the ELB, making it "red".
            - The baseline and new ASGs are now existing as "red/red".
        - The baseline ASG is removed from the ELB.
            - As traffic has ceased to be directed to the baseline ASG, it becomes "black".

    Workflow:
        - enable new ASGs
        - wait for instances to be healthy in the load balancer
        - ensure the new ASGs are not pending delete or disabled
        - tag and disable current asgs

    Args:
        new_asgs (dict): List of new ASGs to be added to the ELB, keyed by cluster.
        baseline_asgs (dict): List of existing ASGs already added to the ELB, keyed by cluster.

    Returns:
        success (bool): True if red/black operation succeeded, else False.
        asgs_enabled (dict): List of ASGs that are added to the ELB, keyed by cluster.
        asgs_disabled (dict): List of ASGs that are removed from the ELB, keyed by cluster.
    """
    asgs_enabled = copy.deepcopy(baseline_cluster_asgs)
    asgs_disabled = copy.deepcopy(new_cluster_asgs)

    def _enable_cluster_asg(cluster, asg):
        """
        Shifts ASG from disabled to enabled.
        """
        enable_asg(asg)
        _move_asg_from_disabled_to_enabled(cluster, asg)

    def _disable_cluster_asg(cluster, asg):
        """
        Shifts ASG from enabled to disabled.
        """
        disable_asg(asg)
        _move_asg_from_enabled_to_disabled(cluster, asg)

    def _move_asg_from_disabled_to_enabled(cluster, asg):
        """
        Shifts ASG from disabled to enabled.
        """
        asgs_enabled[cluster].append(asg)
        asgs_disabled[cluster].remove(asg)

    def _move_asg_from_enabled_to_disabled(cluster, asg):
        """
        Shifts ASG from enabled to disabled.
        """
        asgs_enabled[cluster].remove(asg)
        asgs_disabled[cluster].append(asg)

    def _disable_clustered_asgs(clustered_asgs, failure_msg):
        """
        Disable all the ASGs in the lists, keyed by cluster.
        """
        for cluster, asgs in clustered_asgs.iteritems():
            for asg in asgs:
                try:
                    _disable_cluster_asg(cluster, asg)
                except:  # pylint: disable=bare-except
                    LOG.warning(failure_msg.format(asg))

    elbs_to_monitor = []
    newly_enabled_asgs = defaultdict(list)
    for cluster, asgs in new_cluster_asgs.iteritems():
        for asg in asgs:
            try:
                _enable_cluster_asg(cluster, asg)
                elbs_to_monitor.extend(elbs_for_asg(asg))
                newly_enabled_asgs[cluster].append(asg)
            except:  # pylint: disable=bare-except
                LOG.error("Error enabling ASG '{}'. Disabling traffic to all new ASGs.".format(asg))
                LOG.error(traceback.format_exc())
                # Disable the ASG which failed first.
                _disable_cluster_asg(cluster, asg)
                # Then disable any new other ASGs that have been newly enabled.
                _disable_clustered_asgs(
                    newly_enabled_asgs,
                    "Unable to disable ASG '{}' after failure."
                )
                return (False, asgs_enabled, asgs_disabled)

    LOG.info("New ASGs {} are active and will be available after passing the healthchecks.".format(
        dict(newly_enabled_asgs)
    ))

    # Wait for all instances to be in service in all ELBs.
    try:
        ec2.wait_for_healthy_elbs(elbs_to_monitor, 600)
    except:  # pylint: disable=bare-except
        LOG.info("Some ASGs are failing ELB health checks. Disabling traffic to all new ASGs.")
        _disable_clustered_asgs(
            newly_enabled_asgs,
            "Unable to disable ASG '{}' after waiting for healthy ELBs."
        )
        return (False, asgs_enabled, asgs_disabled)

    # Add a sleep delay here to wait and see how the new ASGs react to traffic.
    # A flawed release would likely make the new ASGs fail the health checks below
    # and, if any new ASGs fail the health checks, the old ASGs would *not be disabled.
    time.sleep(secs_before_old_asgs_disabled)

    # Ensure the new ASGs are still healthy and not pending delete before disabling the old ASGs.
    for cluster, asgs in newly_enabled_asgs.iteritems():
        for asg in asgs:
            err_msg = None
            if is_asg_pending_delete(asg):
                err_msg = "New ASG '{}' is pending delete.".format(asg)
            elif not is_asg_enabled(asg):
                err_msg = "New ASG '{}' is not enabled.".format(asg)
            if err_msg:
                LOG.error("{} Aborting disabling of old ASGs.".format(err_msg))
                return (False, asgs_enabled, asgs_disabled)

    LOG.info("New ASGs have passed the healthchecks. Now disabling old ASGs.")

    for cluster, asgs in baseline_cluster_asgs.iteritems():
        for asg in asgs:
            try:
                if is_asg_enabled(asg):
                    try:
                        _disable_cluster_asg(cluster, asg)
                    except:  # pylint: disable=bare-except
                        LOG.warning("Unable to disable ASG '{}' after enabling new ASGs.".format(asg))
                elif asg in asgs_enabled[cluster]:
                    # If the asg is not enabled, but we have it in the enabled list remove it. This may occur by
                    # pulling from 2 different sources of truth at different intervals. The asg could have been disabled
                    # in the intervening time.
                    _move_asg_from_enabled_to_disabled(cluster, asg)
            except ASGDoesNotExistException:
                # This operation should not fail if one of the baseline ASGs was removed during the deployment process
                LOG.info("ASG {asg} in cluster {cluster} no longer exists, removing it from the enabled cluster list"
                         .format(asg=asg, cluster=cluster))
                _move_asg_from_enabled_to_disabled(cluster, asg)

            try:
                ec2.tag_asg_for_deletion(asg)
            except ASGDoesNotExistException:
                LOG.info("Unable to tag ASG '{}' as it no longer exists, skipping.".format(asg))

    return (True, asgs_enabled, asgs_disabled)