예제 #1
0
 def get_vrrp_subflow(self, prefix):
     sf_name = prefix + '-' + constants.GET_VRRP_SUBFLOW
     vrrp_subflow = linear_flow.Flow(sf_name)
     vrrp_subflow.add(
         network_tasks.GetAmphoraeNetworkConfigs(
             name=sf_name + '-' + constants.GET_AMP_NETWORK_CONFIG,
             requires=constants.LOADBALANCER,
             provides=constants.AMPHORAE_NETWORK_CONFIG))
     vrrp_subflow.add(
         amphora_driver_tasks.AmphoraUpdateVRRPInterface(
             name=sf_name + '-' + constants.AMP_UPDATE_VRRP_INTF,
             requires=constants.LOADBALANCER,
             provides=constants.LOADBALANCER))
     vrrp_subflow.add(
         database_tasks.CreateVRRPGroupForLB(
             name=sf_name + '-' + constants.CREATE_VRRP_GROUP_FOR_LB,
             requires=constants.LOADBALANCER,
             provides=constants.LOADBALANCER))
     vrrp_subflow.add(
         amphora_driver_tasks.AmphoraVRRPUpdate(
             name=sf_name + '-' + constants.AMP_VRRP_UPDATE,
             requires=(constants.LOADBALANCER,
                       constants.AMPHORAE_NETWORK_CONFIG)))
     vrrp_subflow.add(
         amphora_driver_tasks.AmphoraVRRPStart(
             name=sf_name + '-' + constants.AMP_VRRP_START,
             requires=constants.LOADBALANCER))
     return vrrp_subflow
예제 #2
0
    def get_failover_LB_flow(self, amps, lb):
        """Failover a load balancer.

        1. Validate the VIP port is correct and present.
        2. Build a replacement amphora.
        3. Delete the failed amphora.
        4. Configure the replacement amphora listeners.
        5. Configure VRRP for the listeners.
        6. Build the second replacement amphora.
        7. Delete the second failed amphora.
        8. Delete any extraneous amphora.
        9. Configure the listeners on the new amphorae.
        10. Configure the VRRP on the new amphorae.
        11. Reload the listener configurations to pick up VRRP changes.
        12. Mark the load balancer back to ACTIVE.

        :returns: The flow that will provide the failover.
        """
        # Pick one amphora to be failed over if any exist.
        failed_amp = None
        if amps:
            failed_amp = amps.pop()

        failover_LB_flow = linear_flow.Flow(
            constants.FAILOVER_LOADBALANCER_FLOW)

        # Revert LB to provisioning_status ERROR if this flow goes wrong
        failover_LB_flow.add(
            lifecycle_tasks.LoadBalancerToErrorOnRevertTask(
                requires=constants.LOADBALANCER))

        # Setup timeouts for our requests to the amphorae
        timeout_dict = {
            constants.CONN_MAX_RETRIES:
            CONF.haproxy_amphora.active_connection_max_retries,
            constants.CONN_RETRY_INTERVAL:
            CONF.haproxy_amphora.active_connection_rety_interval
        }

        if failed_amp:
            if failed_amp.role in (constants.ROLE_MASTER,
                                   constants.ROLE_BACKUP):
                amp_role = 'master_or_backup'
            elif failed_amp.role == constants.ROLE_STANDALONE:
                amp_role = 'standalone'
            elif failed_amp.role is None:
                amp_role = 'spare'
            else:
                amp_role = 'undefined'
            LOG.info(
                "Performing failover for amphora: %s", {
                    "id": failed_amp.id,
                    "load_balancer_id": lb.id,
                    "lb_network_ip": failed_amp.lb_network_ip,
                    "compute_id": failed_amp.compute_id,
                    "role": amp_role
                })

            failover_LB_flow.add(
                database_tasks.MarkAmphoraPendingDeleteInDB(
                    requires=constants.AMPHORA,
                    inject={constants.AMPHORA: failed_amp}))

            failover_LB_flow.add(
                database_tasks.MarkAmphoraHealthBusy(
                    requires=constants.AMPHORA,
                    inject={constants.AMPHORA: failed_amp}))

        # Check that the VIP port exists and is ok
        failover_LB_flow.add(
            network_tasks.AllocateVIP(requires=constants.LOADBALANCER,
                                      provides=constants.VIP))

        # Update the database with the VIP information
        failover_LB_flow.add(
            database_tasks.UpdateVIPAfterAllocation(
                requires=(constants.LOADBALANCER_ID, constants.VIP),
                provides=constants.LOADBALANCER))

        # Make sure the SG has the correct rules and re-apply to the
        # VIP port. It is not used on the VIP port, but will help lock
        # the SG as in use.
        failover_LB_flow.add(
            network_tasks.UpdateVIPSecurityGroup(
                requires=constants.LOADBALANCER_ID,
                provides=constants.VIP_SG_ID))

        new_amp_role = constants.ROLE_STANDALONE
        if lb.topology == constants.TOPOLOGY_ACTIVE_STANDBY:
            new_amp_role = constants.ROLE_BACKUP

        # Get a replacement amphora and plug all of the networking.
        #
        # Do this early as the compute services have been observed to be
        # unreliable. The community decided the chance that deleting first
        # would open resources for an instance is less likely than the compute
        # service failing to boot an instance for other reasons.
        if failed_amp:
            failed_vrrp_is_ipv6 = False
            if failed_amp.vrrp_ip:
                failed_vrrp_is_ipv6 = utils.is_ipv6(failed_amp.vrrp_ip)
            failover_LB_flow.add(
                self.amp_flows.get_amphora_for_lb_failover_subflow(
                    prefix=constants.FAILOVER_LOADBALANCER_FLOW,
                    role=new_amp_role,
                    failed_amp_vrrp_port_id=failed_amp.vrrp_port_id,
                    is_vrrp_ipv6=failed_vrrp_is_ipv6))
        else:
            failover_LB_flow.add(
                self.amp_flows.get_amphora_for_lb_failover_subflow(
                    prefix=constants.FAILOVER_LOADBALANCER_FLOW,
                    role=new_amp_role))

        if lb.topology == constants.TOPOLOGY_ACTIVE_STANDBY:
            failover_LB_flow.add(
                database_tasks.MarkAmphoraBackupInDB(
                    name=constants.MARK_AMP_BACKUP_INDB,
                    requires=constants.AMPHORA))

        # Delete the failed amp
        if failed_amp:
            failover_LB_flow.add(
                self.amp_flows.get_delete_amphora_flow(failed_amp))

        # Update the data stored in the flow from the database
        failover_LB_flow.add(
            database_tasks.ReloadLoadBalancer(
                requires=constants.LOADBALANCER_ID,
                provides=constants.LOADBALANCER))

        # Configure the listener(s)
        # We will run update on this amphora again later if this is
        # an active/standby load balancer because we want this amp
        # functional as soon as possible. It must run again to update
        # the configurations for the new peers.
        failover_LB_flow.add(
            amphora_driver_tasks.AmpListenersUpdate(
                name=constants.AMP_LISTENER_UPDATE,
                requires=(constants.LOADBALANCER, constants.AMPHORA),
                inject={constants.TIMEOUT_DICT: timeout_dict}))

        # Bring up the new "backup" amphora VIP now to reduce the outage
        # on the final failover. This dropped the outage from 8-9 seconds
        # to less than one in my lab.
        # This does mean some steps have to be repeated later to reconfigure
        # for the second amphora as a peer.
        if lb.topology == constants.TOPOLOGY_ACTIVE_STANDBY:

            failover_LB_flow.add(
                database_tasks.CreateVRRPGroupForLB(
                    name=new_amp_role + '-' +
                    constants.CREATE_VRRP_GROUP_FOR_LB,
                    requires=constants.LOADBALANCER_ID))

            failover_LB_flow.add(
                network_tasks.GetAmphoraNetworkConfigsByID(
                    name=(new_amp_role + '-' +
                          constants.GET_AMPHORA_NETWORK_CONFIGS_BY_ID),
                    requires=(constants.LOADBALANCER_ID, constants.AMPHORA_ID),
                    provides=constants.FIRST_AMP_NETWORK_CONFIGS))

            failover_LB_flow.add(
                amphora_driver_tasks.AmphoraUpdateVRRPInterface(
                    name=new_amp_role + '-' + constants.AMP_UPDATE_VRRP_INTF,
                    requires=constants.AMPHORA,
                    inject={constants.TIMEOUT_DICT: timeout_dict},
                    provides=constants.FIRST_AMP_VRRP_INTERFACE))

            failover_LB_flow.add(
                amphora_driver_tasks.AmphoraVRRPUpdate(
                    name=new_amp_role + '-' + constants.AMP_VRRP_UPDATE,
                    requires=(constants.LOADBALANCER_ID, constants.AMPHORA),
                    rebind={
                        constants.AMPHORAE_NETWORK_CONFIG:
                        constants.FIRST_AMP_NETWORK_CONFIGS,
                        constants.AMP_VRRP_INT:
                        constants.FIRST_AMP_VRRP_INTERFACE
                    },
                    inject={constants.TIMEOUT_DICT: timeout_dict}))

            failover_LB_flow.add(
                amphora_driver_tasks.AmphoraVRRPStart(
                    name=new_amp_role + '-' + constants.AMP_VRRP_START,
                    requires=constants.AMPHORA,
                    inject={constants.TIMEOUT_DICT: timeout_dict}))

            # Start the listener. This needs to be done here because
            # it will create the required haproxy check scripts for
            # the VRRP deployed above.
            # A "V" or newer amphora-agent will remove the need for this
            # task here.
            # TODO(johnsom) Remove this in the "X" cycle
            failover_LB_flow.add(
                amphora_driver_tasks.ListenersStart(
                    name=new_amp_role + '-' + constants.AMP_LISTENER_START,
                    requires=(constants.LOADBALANCER, constants.AMPHORA)))

            #  #### Work on standby amphora if needed #####

            new_amp_role = constants.ROLE_MASTER
            failed_amp = None
            if amps:
                failed_amp = amps.pop()

            if failed_amp:
                if failed_amp.role in (constants.ROLE_MASTER,
                                       constants.ROLE_BACKUP):
                    amp_role = 'master_or_backup'
                elif failed_amp.role == constants.ROLE_STANDALONE:
                    amp_role = 'standalone'
                elif failed_amp.role is None:
                    amp_role = 'spare'
                else:
                    amp_role = 'undefined'
                LOG.info(
                    "Performing failover for amphora: %s", {
                        "id": failed_amp.id,
                        "load_balancer_id": lb.id,
                        "lb_network_ip": failed_amp.lb_network_ip,
                        "compute_id": failed_amp.compute_id,
                        "role": amp_role
                    })

                failover_LB_flow.add(
                    database_tasks.MarkAmphoraPendingDeleteInDB(
                        name=(new_amp_role + '-' +
                              constants.MARK_AMPHORA_PENDING_DELETE),
                        requires=constants.AMPHORA,
                        inject={constants.AMPHORA: failed_amp}))

                failover_LB_flow.add(
                    database_tasks.MarkAmphoraHealthBusy(
                        name=(new_amp_role + '-' +
                              constants.MARK_AMPHORA_HEALTH_BUSY),
                        requires=constants.AMPHORA,
                        inject={constants.AMPHORA: failed_amp}))

            # Get a replacement amphora and plug all of the networking.
            #
            # Do this early as the compute services have been observed to be
            # unreliable. The community decided the chance that deleting first
            # would open resources for an instance is less likely than the
            # compute service failing to boot an instance for other reasons.
            failover_LB_flow.add(
                self.amp_flows.get_amphora_for_lb_failover_subflow(
                    prefix=(new_amp_role + '-' +
                            constants.FAILOVER_LOADBALANCER_FLOW),
                    role=new_amp_role))

            failover_LB_flow.add(
                database_tasks.MarkAmphoraMasterInDB(
                    name=constants.MARK_AMP_MASTER_INDB,
                    requires=constants.AMPHORA))

            # Delete the failed amp
            if failed_amp:
                failover_LB_flow.add(
                    self.amp_flows.get_delete_amphora_flow(failed_amp))
                failover_LB_flow.add(
                    database_tasks.DisableAmphoraHealthMonitoring(
                        name=(new_amp_role + '-' +
                              constants.DISABLE_AMP_HEALTH_MONITORING),
                        requires=constants.AMPHORA,
                        inject={constants.AMPHORA: failed_amp}))

        # Remove any extraneous amphora
        # Note: This runs in all topology situations.
        #       It should run before the act/stdby final listener update so
        #       that we don't bother attempting to update dead amphorae.
        delete_extra_amps_flow = unordered_flow.Flow(
            constants.DELETE_EXTRA_AMPHORAE_FLOW)
        for amp in amps:
            LOG.debug(
                'Found extraneous amphora %s on load balancer %s. '
                'Deleting.', amp.id, lb.id)
            delete_extra_amps_flow.add(
                self.amp_flows.get_delete_amphora_flow(amp))

        failover_LB_flow.add(delete_extra_amps_flow)

        if lb.topology == constants.TOPOLOGY_ACTIVE_STANDBY:
            # Update the data stored in the flow from the database
            failover_LB_flow.add(
                database_tasks.ReloadLoadBalancer(
                    name=new_amp_role + '-' +
                    constants.RELOAD_LB_AFTER_AMP_ASSOC,
                    requires=constants.LOADBALANCER_ID,
                    provides=constants.LOADBALANCER))

            failover_LB_flow.add(
                database_tasks.GetAmphoraeFromLoadbalancer(
                    name=new_amp_role + '-' + constants.GET_AMPHORAE_FROM_LB,
                    requires=constants.LOADBALANCER_ID,
                    provides=constants.AMPHORAE))

            # Listeners update needs to be run on all amphora to update
            # their peer configurations. So parallelize this with an
            # unordered subflow.
            update_amps_subflow = unordered_flow.Flow(
                constants.UPDATE_AMPS_SUBFLOW)

            # Setup parallel flows for each amp. We don't know the new amp
            # details at flow creation time, so setup a subflow for each
            # amp on the LB, they let the task index into a list of amps
            # to find the amphora it should work on.
            update_amps_subflow.add(
                amphora_driver_tasks.AmphoraIndexListenerUpdate(
                    name=(constants.AMPHORA + '-0-' +
                          constants.AMP_LISTENER_UPDATE),
                    requires=(constants.LOADBALANCER, constants.AMPHORAE),
                    inject={
                        constants.AMPHORA_INDEX: 0,
                        constants.TIMEOUT_DICT: timeout_dict
                    }))
            update_amps_subflow.add(
                amphora_driver_tasks.AmphoraIndexListenerUpdate(
                    name=(constants.AMPHORA + '-1-' +
                          constants.AMP_LISTENER_UPDATE),
                    requires=(constants.LOADBALANCER, constants.AMPHORAE),
                    inject={
                        constants.AMPHORA_INDEX: 1,
                        constants.TIMEOUT_DICT: timeout_dict
                    }))

            failover_LB_flow.add(update_amps_subflow)

            # Configure and enable keepalived in the amphora
            failover_LB_flow.add(
                self.amp_flows.get_vrrp_subflow(new_amp_role + '-' +
                                                constants.GET_VRRP_SUBFLOW,
                                                timeout_dict,
                                                create_vrrp_group=False))

            # #### End of standby ####

            # Reload the listener. This needs to be done here because
            # it will create the required haproxy check scripts for
            # the VRRP deployed above.
            # A "V" or newer amphora-agent will remove the need for this
            # task here.
            # TODO(johnsom) Remove this in the "X" cycle
            failover_LB_flow.add(
                amphora_driver_tasks.AmphoraIndexListenersReload(
                    name=(new_amp_role + '-' +
                          constants.AMPHORA_RELOAD_LISTENER),
                    requires=(constants.LOADBALANCER, constants.AMPHORAE),
                    inject={
                        constants.AMPHORA_INDEX: 1,
                        constants.TIMEOUT_DICT: timeout_dict
                    }))

        # Remove any extraneous ports
        # Note: Nova sometimes fails to delete ports attached to an instance.
        #       For example, if you create an LB with a listener, then
        #       'openstack server delete' the amphora, you will see the vrrp
        #       port attached to that instance will remain after the instance
        #       is deleted.
        # TODO(johnsom) Fix this as part of
        #               https://storyboard.openstack.org/#!/story/2007077

        # Mark LB ACTIVE
        failover_LB_flow.add(
            database_tasks.MarkLBActiveInDB(mark_subobjects=True,
                                            requires=constants.LOADBALANCER))

        return failover_LB_flow
예제 #3
0
    def get_vrrp_subflow(self,
                         prefix,
                         timeout_dict=None,
                         create_vrrp_group=True):
        sf_name = prefix + '-' + constants.GET_VRRP_SUBFLOW
        vrrp_subflow = linear_flow.Flow(sf_name)

        # Optimization for failover flow. No reason to call this
        # when configuring the secondary amphora.
        if create_vrrp_group:
            vrrp_subflow.add(
                database_tasks.CreateVRRPGroupForLB(
                    name=sf_name + '-' + constants.CREATE_VRRP_GROUP_FOR_LB,
                    requires=constants.LOADBALANCER_ID))

        vrrp_subflow.add(
            network_tasks.GetAmphoraeNetworkConfigs(
                name=sf_name + '-' + constants.GET_AMP_NETWORK_CONFIG,
                requires=constants.LOADBALANCER_ID,
                provides=constants.AMPHORAE_NETWORK_CONFIG))

        # VRRP update needs to be run on all amphora to update
        # their peer configurations. So parallelize this with an
        # unordered subflow.
        update_amps_subflow = unordered_flow.Flow('VRRP-update-subflow')

        # We have three tasks to run in order, per amphora
        amp_0_subflow = linear_flow.Flow('VRRP-amp-0-update-subflow')

        amp_0_subflow.add(
            amphora_driver_tasks.AmphoraIndexUpdateVRRPInterface(
                name=sf_name + '-0-' + constants.AMP_UPDATE_VRRP_INTF,
                requires=constants.AMPHORAE,
                inject={
                    constants.AMPHORA_INDEX: 0,
                    constants.TIMEOUT_DICT: timeout_dict
                },
                provides=constants.AMP_VRRP_INT))

        amp_0_subflow.add(
            amphora_driver_tasks.AmphoraIndexVRRPUpdate(
                name=sf_name + '-0-' + constants.AMP_VRRP_UPDATE,
                requires=(constants.LOADBALANCER_ID,
                          constants.AMPHORAE_NETWORK_CONFIG,
                          constants.AMPHORAE, constants.AMP_VRRP_INT),
                inject={
                    constants.AMPHORA_INDEX: 0,
                    constants.TIMEOUT_DICT: timeout_dict
                }))

        amp_0_subflow.add(
            amphora_driver_tasks.AmphoraIndexVRRPStart(
                name=sf_name + '-0-' + constants.AMP_VRRP_START,
                requires=constants.AMPHORAE,
                inject={
                    constants.AMPHORA_INDEX: 0,
                    constants.TIMEOUT_DICT: timeout_dict
                }))

        amp_1_subflow = linear_flow.Flow('VRRP-amp-1-update-subflow')

        amp_1_subflow.add(
            amphora_driver_tasks.AmphoraIndexUpdateVRRPInterface(
                name=sf_name + '-1-' + constants.AMP_UPDATE_VRRP_INTF,
                requires=constants.AMPHORAE,
                inject={
                    constants.AMPHORA_INDEX: 1,
                    constants.TIMEOUT_DICT: timeout_dict
                },
                provides=constants.AMP_VRRP_INT))

        amp_1_subflow.add(
            amphora_driver_tasks.AmphoraIndexVRRPUpdate(
                name=sf_name + '-1-' + constants.AMP_VRRP_UPDATE,
                requires=(constants.LOADBALANCER_ID,
                          constants.AMPHORAE_NETWORK_CONFIG,
                          constants.AMPHORAE, constants.AMP_VRRP_INT),
                inject={
                    constants.AMPHORA_INDEX: 1,
                    constants.TIMEOUT_DICT: timeout_dict
                }))
        amp_1_subflow.add(
            amphora_driver_tasks.AmphoraIndexVRRPStart(
                name=sf_name + '-1-' + constants.AMP_VRRP_START,
                requires=constants.AMPHORAE,
                inject={
                    constants.AMPHORA_INDEX: 1,
                    constants.TIMEOUT_DICT: timeout_dict
                }))

        update_amps_subflow.add(amp_0_subflow)
        update_amps_subflow.add(amp_1_subflow)

        vrrp_subflow.add(update_amps_subflow)

        return vrrp_subflow