def get_failover_LB_flow(self, amps, lb):
        """Failover a load balancer.

        1. Validate the VIP port is correct and present.
        2. Build a replacement amphora.
        3. Delete the failed amphora.
        4. Configure the replacement amphora listeners.
        5. Configure VRRP for the listeners.
        6. Build the second replacement amphora.
        7. Delete the second failed amphora.
        8. Delete any extraneous amphora.
        9. Configure the listeners on the new amphorae.
        10. Configure the VRRP on the new amphorae.
        11. Reload the listener configurations to pick up VRRP changes.
        12. Mark the load balancer back to ACTIVE.

        :returns: The flow that will provide the failover.
        """
        lb_topology = lb[constants.FLAVOR][constants.LOADBALANCER_TOPOLOGY]
        # Pick one amphora to be failed over if any exist.
        failed_amp = None
        if amps:
            failed_amp = amps.pop()

        failover_LB_flow = linear_flow.Flow(
            constants.FAILOVER_LOADBALANCER_FLOW)

        # Revert LB to provisioning_status ERROR if this flow goes wrong
        failover_LB_flow.add(
            lifecycle_tasks.LoadBalancerToErrorOnRevertTask(
                requires=constants.LOADBALANCER))

        # Setup timeouts for our requests to the amphorae
        timeout_dict = {
            constants.CONN_MAX_RETRIES:
            CONF.haproxy_amphora.active_connection_max_retries,
            constants.CONN_RETRY_INTERVAL:
            CONF.haproxy_amphora.active_connection_rety_interval
        }

        if failed_amp:
            failed_amp_role = failed_amp.get(constants.ROLE)
            if failed_amp_role in (constants.ROLE_MASTER,
                                   constants.ROLE_BACKUP):
                amp_role = 'master_or_backup'
            elif failed_amp_role == constants.ROLE_STANDALONE:
                amp_role = 'standalone'
            elif failed_amp_role is None:
                amp_role = 'spare'
            else:
                amp_role = 'undefined'
            LOG.info(
                "Performing failover for amphora: %s", {
                    "id": failed_amp.get(constants.ID),
                    "load_balancer_id": lb.get(constants.ID),
                    "lb_network_ip": failed_amp.get(constants.LB_NETWORK_IP),
                    "compute_id": failed_amp.get(constants.COMPUTE_ID),
                    "role": amp_role
                })

            failover_LB_flow.add(
                database_tasks.MarkAmphoraPendingDeleteInDB(
                    requires=constants.AMPHORA,
                    inject={constants.AMPHORA: failed_amp}))

            failover_LB_flow.add(
                database_tasks.MarkAmphoraHealthBusy(
                    requires=constants.AMPHORA,
                    inject={constants.AMPHORA: failed_amp}))

        # Check that the VIP port exists and is ok
        failover_LB_flow.add(
            network_tasks.AllocateVIPforFailover(
                requires=constants.LOADBALANCER, provides=constants.VIP))

        # Update the database with the VIP information
        failover_LB_flow.add(
            database_tasks.UpdateVIPAfterAllocation(
                requires=(constants.LOADBALANCER_ID, constants.VIP),
                provides=constants.LOADBALANCER))

        # Make sure the SG has the correct rules and re-apply to the
        # VIP port. It is not used on the VIP port, but will help lock
        # the SG as in use.
        failover_LB_flow.add(
            network_tasks.UpdateVIPSecurityGroup(
                requires=constants.LOADBALANCER_ID,
                provides=constants.VIP_SG_ID))

        new_amp_role = constants.ROLE_STANDALONE
        if lb_topology == constants.TOPOLOGY_ACTIVE_STANDBY:
            new_amp_role = constants.ROLE_BACKUP

        # Get a replacement amphora and plug all of the networking.
        #
        # Do this early as the compute services have been observed to be
        # unreliable. The community decided the chance that deleting first
        # would open resources for an instance is less likely than the compute
        # service failing to boot an instance for other reasons.
        if failed_amp:
            failed_vrrp_is_ipv6 = False
            if failed_amp.get(constants.VRRP_IP):
                failed_vrrp_is_ipv6 = utils.is_ipv6(
                    failed_amp[constants.VRRP_IP])
            failover_LB_flow.add(
                self.amp_flows.get_amphora_for_lb_failover_subflow(
                    prefix=constants.FAILOVER_LOADBALANCER_FLOW,
                    role=new_amp_role,
                    failed_amp_vrrp_port_id=failed_amp.get(
                        constants.VRRP_PORT_ID),
                    is_vrrp_ipv6=failed_vrrp_is_ipv6))
        else:
            failover_LB_flow.add(
                self.amp_flows.get_amphora_for_lb_failover_subflow(
                    prefix=constants.FAILOVER_LOADBALANCER_FLOW,
                    role=new_amp_role))

        if lb_topology == constants.TOPOLOGY_ACTIVE_STANDBY:
            failover_LB_flow.add(
                database_tasks.MarkAmphoraBackupInDB(
                    name=constants.MARK_AMP_BACKUP_INDB,
                    requires=constants.AMPHORA))

        # Delete the failed amp
        if failed_amp:
            failover_LB_flow.add(
                self.amp_flows.get_delete_amphora_flow(failed_amp))

        # Update the data stored in the flow from the database
        failover_LB_flow.add(
            database_tasks.ReloadLoadBalancer(
                requires=constants.LOADBALANCER_ID,
                provides=constants.LOADBALANCER))

        # Configure the listener(s)
        # We will run update on this amphora again later if this is
        # an active/standby load balancer because we want this amp
        # functional as soon as possible. It must run again to update
        # the configurations for the new peers.
        failover_LB_flow.add(
            amphora_driver_tasks.AmpListenersUpdate(
                name=constants.AMP_LISTENER_UPDATE,
                requires=(constants.LOADBALANCER, constants.AMPHORA),
                inject={constants.TIMEOUT_DICT: timeout_dict}))

        # Bring up the new "backup" amphora VIP now to reduce the outage
        # on the final failover. This dropped the outage from 8-9 seconds
        # to less than one in my lab.
        # This does mean some steps have to be repeated later to reconfigure
        # for the second amphora as a peer.
        if lb_topology == constants.TOPOLOGY_ACTIVE_STANDBY:

            failover_LB_flow.add(
                database_tasks.CreateVRRPGroupForLB(
                    name=new_amp_role + '-' +
                    constants.CREATE_VRRP_GROUP_FOR_LB,
                    requires=constants.LOADBALANCER_ID))

            failover_LB_flow.add(
                network_tasks.GetAmphoraNetworkConfigsByID(
                    name=(new_amp_role + '-' +
                          constants.GET_AMPHORA_NETWORK_CONFIGS_BY_ID),
                    requires=(constants.LOADBALANCER_ID, constants.AMPHORA_ID),
                    provides=constants.FIRST_AMP_NETWORK_CONFIGS))

            failover_LB_flow.add(
                amphora_driver_tasks.AmphoraUpdateVRRPInterface(
                    name=new_amp_role + '-' + constants.AMP_UPDATE_VRRP_INTF,
                    requires=constants.AMPHORA,
                    inject={constants.TIMEOUT_DICT: timeout_dict},
                    provides=constants.FIRST_AMP_VRRP_INTERFACE))

            failover_LB_flow.add(
                amphora_driver_tasks.AmphoraVRRPUpdate(
                    name=new_amp_role + '-' + constants.AMP_VRRP_UPDATE,
                    requires=(constants.LOADBALANCER_ID, constants.AMPHORA),
                    rebind={
                        constants.AMPHORAE_NETWORK_CONFIG:
                        constants.FIRST_AMP_NETWORK_CONFIGS,
                        constants.AMP_VRRP_INT:
                        constants.FIRST_AMP_VRRP_INTERFACE
                    },
                    inject={constants.TIMEOUT_DICT: timeout_dict}))

            failover_LB_flow.add(
                amphora_driver_tasks.AmphoraVRRPStart(
                    name=new_amp_role + '-' + constants.AMP_VRRP_START,
                    requires=constants.AMPHORA,
                    inject={constants.TIMEOUT_DICT: timeout_dict}))

            # Start the listener. This needs to be done here because
            # it will create the required haproxy check scripts for
            # the VRRP deployed above.
            # A "V" or newer amphora-agent will remove the need for this
            # task here.
            # TODO(johnsom) Remove this in the "X" cycle
            failover_LB_flow.add(
                amphora_driver_tasks.ListenersStart(
                    name=new_amp_role + '-' + constants.AMP_LISTENER_START,
                    requires=(constants.LOADBALANCER, constants.AMPHORA)))

            #  #### Work on standby amphora if needed #####

            new_amp_role = constants.ROLE_MASTER
            failed_amp = None
            if amps:
                failed_amp = amps.pop()

            if failed_amp:
                failed_amp_role = failed_amp.get(constants.ROLE)
                if failed_amp_role in (constants.ROLE_MASTER,
                                       constants.ROLE_BACKUP):
                    amp_role = 'master_or_backup'
                elif failed_amp_role == constants.ROLE_STANDALONE:
                    amp_role = 'standalone'
                elif failed_amp_role is None:
                    amp_role = 'spare'
                else:
                    amp_role = 'undefined'
                LOG.info(
                    "Performing failover for amphora: %s", {
                        "id": failed_amp.get(constants.ID),
                        "load_balancer_id": lb.get(constants.ID),
                        "lb_network_ip": failed_amp.get(
                            constants.LB_NETWORK_IP),
                        "compute_id": failed_amp.get(constants.COMPUTE_ID),
                        "role": amp_role
                    })

                failover_LB_flow.add(
                    database_tasks.MarkAmphoraPendingDeleteInDB(
                        name=(new_amp_role + '-' +
                              constants.MARK_AMPHORA_PENDING_DELETE),
                        requires=constants.AMPHORA,
                        inject={constants.AMPHORA: failed_amp}))

                failover_LB_flow.add(
                    database_tasks.MarkAmphoraHealthBusy(
                        name=(new_amp_role + '-' +
                              constants.MARK_AMPHORA_HEALTH_BUSY),
                        requires=constants.AMPHORA,
                        inject={constants.AMPHORA: failed_amp}))

            # Get a replacement amphora and plug all of the networking.
            #
            # Do this early as the compute services have been observed to be
            # unreliable. The community decided the chance that deleting first
            # would open resources for an instance is less likely than the
            # compute service failing to boot an instance for other reasons.
            failover_LB_flow.add(
                self.amp_flows.get_amphora_for_lb_failover_subflow(
                    prefix=(new_amp_role + '-' +
                            constants.FAILOVER_LOADBALANCER_FLOW),
                    role=new_amp_role))

            failover_LB_flow.add(
                database_tasks.MarkAmphoraMasterInDB(
                    name=constants.MARK_AMP_MASTER_INDB,
                    requires=constants.AMPHORA))

            # Delete the failed amp
            if failed_amp:
                failover_LB_flow.add(
                    self.amp_flows.get_delete_amphora_flow(failed_amp))
                failover_LB_flow.add(
                    database_tasks.DisableAmphoraHealthMonitoring(
                        name=(new_amp_role + '-' +
                              constants.DISABLE_AMP_HEALTH_MONITORING),
                        requires=constants.AMPHORA,
                        inject={constants.AMPHORA: failed_amp}))

        # Remove any extraneous amphora
        # Note: This runs in all topology situations.
        #       It should run before the act/stdby final listener update so
        #       that we don't bother attempting to update dead amphorae.
        delete_extra_amps_flow = unordered_flow.Flow(
            constants.DELETE_EXTRA_AMPHORAE_FLOW)
        for amp in amps:
            LOG.debug(
                'Found extraneous amphora %s on load balancer %s. '
                'Deleting.', amp.get(constants.ID), lb.get(id))
            delete_extra_amps_flow.add(
                self.amp_flows.get_delete_amphora_flow(amp))

        failover_LB_flow.add(delete_extra_amps_flow)

        if lb_topology == constants.TOPOLOGY_ACTIVE_STANDBY:
            # Update the data stored in the flow from the database
            failover_LB_flow.add(
                database_tasks.ReloadLoadBalancer(
                    name=new_amp_role + '-' +
                    constants.RELOAD_LB_AFTER_AMP_ASSOC,
                    requires=constants.LOADBALANCER_ID,
                    provides=constants.LOADBALANCER))

            failover_LB_flow.add(
                database_tasks.GetAmphoraeFromLoadbalancer(
                    name=new_amp_role + '-' + constants.GET_AMPHORAE_FROM_LB,
                    requires=constants.LOADBALANCER_ID,
                    provides=constants.AMPHORAE))

            # Listeners update needs to be run on all amphora to update
            # their peer configurations. So parallelize this with an
            # unordered subflow.
            update_amps_subflow = unordered_flow.Flow(
                constants.UPDATE_AMPS_SUBFLOW)

            # Setup parallel flows for each amp. We don't know the new amp
            # details at flow creation time, so setup a subflow for each
            # amp on the LB, they let the task index into a list of amps
            # to find the amphora it should work on.
            update_amps_subflow.add(
                amphora_driver_tasks.AmphoraIndexListenerUpdate(
                    name=(constants.AMPHORA + '-0-' +
                          constants.AMP_LISTENER_UPDATE),
                    requires=(constants.LOADBALANCER, constants.AMPHORAE),
                    inject={
                        constants.AMPHORA_INDEX: 0,
                        constants.TIMEOUT_DICT: timeout_dict
                    }))
            update_amps_subflow.add(
                amphora_driver_tasks.AmphoraIndexListenerUpdate(
                    name=(constants.AMPHORA + '-1-' +
                          constants.AMP_LISTENER_UPDATE),
                    requires=(constants.LOADBALANCER, constants.AMPHORAE),
                    inject={
                        constants.AMPHORA_INDEX: 1,
                        constants.TIMEOUT_DICT: timeout_dict
                    }))

            failover_LB_flow.add(update_amps_subflow)

            # Configure and enable keepalived in the amphora
            failover_LB_flow.add(
                self.amp_flows.get_vrrp_subflow(new_amp_role + '-' +
                                                constants.GET_VRRP_SUBFLOW,
                                                timeout_dict,
                                                create_vrrp_group=False))

            # #### End of standby ####

            # Reload the listener. This needs to be done here because
            # it will create the required haproxy check scripts for
            # the VRRP deployed above.
            # A "V" or newer amphora-agent will remove the need for this
            # task here.
            # TODO(johnsom) Remove this in the "X" cycle
            failover_LB_flow.add(
                amphora_driver_tasks.AmphoraIndexListenersReload(
                    name=(new_amp_role + '-' +
                          constants.AMPHORA_RELOAD_LISTENER),
                    requires=(constants.LOADBALANCER, constants.AMPHORAE),
                    inject={
                        constants.AMPHORA_INDEX: 1,
                        constants.TIMEOUT_DICT: timeout_dict
                    }))

        # Remove any extraneous ports
        # Note: Nova sometimes fails to delete ports attached to an instance.
        #       For example, if you create an LB with a listener, then
        #       'openstack server delete' the amphora, you will see the vrrp
        #       port attached to that instance will remain after the instance
        #       is deleted.
        # TODO(johnsom) Fix this as part of
        #               https://storyboard.openstack.org/#!/story/2007077

        # Mark LB ACTIVE
        failover_LB_flow.add(
            database_tasks.MarkLBActiveInDB(mark_subobjects=True,
                                            requires=constants.LOADBALANCER))

        return failover_LB_flow
Exemple #2
0
    def get_amphora_for_lb_failover_subflow(
            self, prefix, role=constants.ROLE_STANDALONE,
            failed_amp_vrrp_port_id=None, is_vrrp_ipv6=False):
        """Creates a new amphora that will be used in a failover flow.

        :requires: loadbalancer_id, flavor, vip, vip_sg_id, loadbalancer
        :provides: amphora_id, amphora
        :param prefix: The flow name prefix to use on the flow and tasks.
        :param role: The role this amphora will have in the topology.
        :param failed_amp_vrrp_port_id: The base port ID of the failed amp.
        :param is_vrrp_ipv6: True if the base port IP is IPv6.
        :return: A Taskflow sub-flow that will create the amphora.
        """

        sf_name = prefix + '-' + constants.CREATE_AMP_FOR_FAILOVER_SUBFLOW

        amp_for_failover_flow = linear_flow.Flow(sf_name)

        # Try to allocate or boot an amphora instance (unconfigured)
        amp_for_failover_flow.add(self.get_amphora_for_lb_subflow(
            prefix=prefix + '-' + constants.FAILOVER_LOADBALANCER_FLOW,
            role=role))

        # Create the VIP base (aka VRRP) port for the amphora.
        amp_for_failover_flow.add(network_tasks.CreateVIPBasePort(
            name=prefix + '-' + constants.CREATE_VIP_BASE_PORT,
            requires=(constants.VIP, constants.VIP_SG_ID,
                      constants.AMPHORA_ID),
            provides=constants.BASE_PORT))

        # Attach the VIP base (aka VRRP) port to the amphora.
        amp_for_failover_flow.add(compute_tasks.AttachPort(
            name=prefix + '-' + constants.ATTACH_PORT,
            requires=(constants.AMPHORA, constants.PORT),
            rebind={constants.PORT: constants.BASE_PORT}))

        # Update the amphora database record with the VIP base port info.
        amp_for_failover_flow.add(database_tasks.UpdateAmpFailoverDetails(
            name=prefix + '-' + constants.UPDATE_AMP_FAILOVER_DETAILS,
            requires=(constants.AMPHORA, constants.VIP, constants.BASE_PORT)))

        # Update the amphora networking for the plugged VIP port
        amp_for_failover_flow.add(network_tasks.GetAmphoraNetworkConfigsByID(
            name=prefix + '-' + constants.GET_AMPHORA_NETWORK_CONFIGS_BY_ID,
            requires=(constants.LOADBALANCER_ID, constants.AMPHORA_ID),
            provides=constants.AMPHORAE_NETWORK_CONFIG))

        # Disable the base (vrrp) port on the failed amphora
        # This prevents a DAD failure when bringing up the new amphora.
        # Keepalived will handle this for act/stdby.
        if (role == constants.ROLE_STANDALONE and failed_amp_vrrp_port_id and
                is_vrrp_ipv6):
            amp_for_failover_flow.add(network_tasks.AdminDownPort(
                name=prefix + '-' + constants.ADMIN_DOWN_PORT,
                inject={constants.PORT_ID: failed_amp_vrrp_port_id}))

        amp_for_failover_flow.add(amphora_driver_tasks.AmphoraPostVIPPlug(
            name=prefix + '-' + constants.AMPHORA_POST_VIP_PLUG,
            requires=(constants.AMPHORA, constants.LOADBALANCER,
                      constants.AMPHORAE_NETWORK_CONFIG)))

        # Plug member ports
        amp_for_failover_flow.add(network_tasks.CalculateAmphoraDelta(
            name=prefix + '-' + constants.CALCULATE_AMPHORA_DELTA,
            requires=(constants.LOADBALANCER, constants.AMPHORA,
                      constants.AVAILABILITY_ZONE, constants.VRRP_PORT),
            rebind={constants.VRRP_PORT: constants.BASE_PORT},
            provides=constants.DELTA))

        amp_for_failover_flow.add(network_tasks.HandleNetworkDelta(
            name=prefix + '-' + constants.HANDLE_NETWORK_DELTA,
            requires=(constants.AMPHORA, constants.DELTA),
            provides=constants.ADDED_PORTS))

        amp_for_failover_flow.add(amphora_driver_tasks.AmphoraePostNetworkPlug(
            name=prefix + '-' + constants.AMPHORAE_POST_NETWORK_PLUG,
            requires=(constants.LOADBALANCER, constants.ADDED_PORTS)))

        return amp_for_failover_flow