Esempio n. 1
0
    def cert_rotate_amphora_flow(self):
        """Implement rotation for amphora's cert.

         1. Create a new certificate
         2. Upload the cert to amphora
         3. update the newly created certificate info to amphora
         4. update the cert_busy flag to be false after rotation

        :returns: The flow for updating an amphora
        """
        rotated_amphora_flow = linear_flow.Flow(
            constants.CERT_ROTATE_AMPHORA_FLOW)

        rotated_amphora_flow.add(lifecycle_tasks.AmphoraToErrorOnRevertTask(
            requires=constants.AMPHORA))

        # create a new certificate, the returned value is the newly created
        # certificate
        rotated_amphora_flow.add(cert_task.GenerateServerPEMTask(
            provides=constants.SERVER_PEM))

        # update it in amphora task
        rotated_amphora_flow.add(amphora_driver_tasks.AmphoraCertUpload(
            requires=(constants.AMPHORA, constants.SERVER_PEM)))

        # update the newly created certificate info to amphora
        rotated_amphora_flow.add(database_tasks.UpdateAmphoraDBCertExpiration(
            requires=(constants.AMPHORA_ID, constants.SERVER_PEM)))

        # update the cert_busy flag to be false after rotation
        rotated_amphora_flow.add(database_tasks.UpdateAmphoraCertBusyToFalse(
            requires=constants.AMPHORA_ID))

        return rotated_amphora_flow
Esempio n. 2
0
    def get_delete_amphora_flow(self):
        """Creates a flow to delete an amphora.

        This should be configurable in the config file
        :returns: The flow for deleting the amphora
        :raises AmphoraNotFound: The referenced Amphora was not found
        """

        delete_amphora_flow = linear_flow.Flow(constants.DELETE_AMPHORA_FLOW)
        delete_amphora_flow.add(lifecycle_tasks.AmphoraToErrorOnRevertTask(
            requires=constants.AMPHORA))
        delete_amphora_flow.add(database_tasks.
                                MarkAmphoraPendingDeleteInDB(
                                    requires=constants.AMPHORA))
        delete_amphora_flow.add(database_tasks.
                                MarkAmphoraHealthBusy(
                                    requires=constants.AMPHORA))
        delete_amphora_flow.add(compute_tasks.ComputeDelete(
            requires=constants.AMPHORA))
        delete_amphora_flow.add(database_tasks.
                                DisableAmphoraHealthMonitoring(
                                    requires=constants.AMPHORA))
        delete_amphora_flow.add(database_tasks.
                                MarkAmphoraDeletedInDB(
                                    requires=constants.AMPHORA))
        return delete_amphora_flow
Esempio n. 3
0
    def get_delete_amphora_flow(
            self, amphora,
            retry_attempts=CONF.controller_worker.amphora_delete_retries,
            retry_interval=(
                CONF.controller_worker.amphora_delete_retry_interval)):
        """Creates a subflow to delete an amphora and it's port.

        This flow is idempotent and safe to retry.

        :param amphora: An amphora dict object.
        :param retry_attempts: The number of times the flow is retried.
        :param retry_interval: The time to wait, in seconds, between retries.
        :returns: The subflow for deleting the amphora.
        :raises AmphoraNotFound: The referenced Amphora was not found.
        """
        amphora_id = amphora[constants.ID]
        delete_amphora_flow = linear_flow.Flow(
            name=constants.DELETE_AMPHORA_FLOW + '-' + amphora_id,
            retry=retry_tasks.SleepingRetryTimesController(
                name='retry-' + constants.DELETE_AMPHORA_FLOW + '-' +
                     amphora_id,
                attempts=retry_attempts, interval=retry_interval))
        delete_amphora_flow.add(lifecycle_tasks.AmphoraToErrorOnRevertTask(
            name=constants.AMPHORA_TO_ERROR_ON_REVERT + '-' + amphora_id,
            inject={constants.AMPHORA: amphora}))
        delete_amphora_flow.add(
            database_tasks.MarkAmphoraPendingDeleteInDB(
                name=constants.MARK_AMPHORA_PENDING_DELETE + '-' + amphora_id,
                inject={constants.AMPHORA: amphora}))
        delete_amphora_flow.add(database_tasks.MarkAmphoraHealthBusy(
            name=constants.MARK_AMPHORA_HEALTH_BUSY + '-' + amphora_id,
            inject={constants.AMPHORA: amphora}))
        delete_amphora_flow.add(compute_tasks.ComputeDelete(
            name=constants.DELETE_AMPHORA + '-' + amphora_id,
            inject={constants.AMPHORA: amphora,
                    constants.PASSIVE_FAILURE: True}))
        delete_amphora_flow.add(database_tasks.DisableAmphoraHealthMonitoring(
            name=constants.DISABLE_AMP_HEALTH_MONITORING + '-' + amphora_id,
            inject={constants.AMPHORA: amphora}))
        delete_amphora_flow.add(database_tasks.MarkAmphoraDeletedInDB(
            name=constants.MARK_AMPHORA_DELETED + '-' + amphora_id,
            inject={constants.AMPHORA: amphora}))
        if amphora.get(constants.VRRP_PORT_ID):
            delete_amphora_flow.add(network_tasks.DeletePort(
                name=(constants.DELETE_PORT + '-' + str(amphora_id) + '-' +
                      str(amphora[constants.VRRP_PORT_ID])),
                inject={constants.PORT_ID: amphora[constants.VRRP_PORT_ID],
                        constants.PASSIVE_FAILURE: True}))
        # TODO(johnsom) What about cleaning up any member ports?
        # maybe we should get the list of attached ports prior to delete
        # and call delete on them here. Fix this as part of
        # https://storyboard.openstack.org/#!/story/2007077

        return delete_amphora_flow
Esempio n. 4
0
    def update_amphora_config_flow(self):
        """Creates a flow to update the amphora agent configuration.

        :returns: The flow for updating an amphora
        """
        update_amphora_flow = linear_flow.Flow(
            constants.UPDATE_AMPHORA_CONFIG_FLOW)

        update_amphora_flow.add(lifecycle_tasks.AmphoraToErrorOnRevertTask(
            requires=constants.AMPHORA))

        update_amphora_flow.add(amphora_driver_tasks.AmphoraConfigUpdate(
            requires=(constants.AMPHORA, constants.FLAVOR)))

        return update_amphora_flow
Esempio n. 5
0
    def test_AmphoraToErrorOnRevertTask(self, mock_amp_status_error,
                                        mock_amp_health_busy):

        amp_to_error_on_revert = lifecycle_tasks.AmphoraToErrorOnRevertTask()

        # Execute
        amp_to_error_on_revert.execute(self.AMPHORA)

        self.assertFalse(mock_amp_status_error.called)

        # Revert
        amp_to_error_on_revert.revert(self.AMPHORA)

        mock_amp_status_error.assert_called_once_with(self.AMPHORA_ID)
        self.assertFalse(mock_amp_health_busy.called)
Esempio n. 6
0
    def get_failover_flow(self, role=constants.ROLE_STANDALONE,
                          load_balancer=None):
        """Creates a flow to failover a stale amphora

        :returns: The flow for amphora failover
        """

        failover_amphora_flow = linear_flow.Flow(
            constants.FAILOVER_AMPHORA_FLOW)

        failover_amphora_flow.add(lifecycle_tasks.AmphoraToErrorOnRevertTask(
            rebind={constants.AMPHORA: constants.FAILED_AMPHORA},
            requires=constants.AMPHORA))

        failover_amphora_flow.add(network_tasks.FailoverPreparationForAmphora(
            rebind={constants.AMPHORA: constants.FAILED_AMPHORA},
            requires=constants.AMPHORA))

        # Note: It seems intuitive to boot an amphora prior to deleting
        #       the old amphora, however this is a complicated issue.
        #       If the target host (due to anit-affinity) is resource
        #       constrained, this will fail where a post-delete will
        #       succeed. Since this is async with the API it would result
        #       in the LB ending in ERROR though the amps are still alive.
        #       Consider in the future making this a complicated
        #       try-on-failure-retry flow, or move upgrade failovers to be
        #       synchronous with the API. For now spares pool and act/stdby
        #       will mitigate most of this delay.

        # Delete the old amphora
        failover_amphora_flow.add(
            database_tasks.MarkAmphoraPendingDeleteInDB(
                rebind={constants.AMPHORA: constants.FAILED_AMPHORA},
                requires=constants.AMPHORA))
        failover_amphora_flow.add(
            database_tasks.MarkAmphoraHealthBusy(
                rebind={constants.AMPHORA: constants.FAILED_AMPHORA},
                requires=constants.AMPHORA))
        failover_amphora_flow.add(compute_tasks.ComputeDelete(
            rebind={constants.AMPHORA: constants.FAILED_AMPHORA},
            requires=constants.AMPHORA))
        failover_amphora_flow.add(network_tasks.WaitForPortDetach(
            rebind={constants.AMPHORA: constants.FAILED_AMPHORA},
            requires=constants.AMPHORA))
        failover_amphora_flow.add(database_tasks.MarkAmphoraDeletedInDB(
            rebind={constants.AMPHORA: constants.FAILED_AMPHORA},
            requires=constants.AMPHORA))

        # If this is an unallocated amp (spares pool), we're done
        if not load_balancer:
            failover_amphora_flow.add(
                database_tasks.DisableAmphoraHealthMonitoring(
                    rebind={constants.AMPHORA: constants.FAILED_AMPHORA},
                    requires=constants.AMPHORA))
            return failover_amphora_flow

        # Save failed amphora details for later
        failover_amphora_flow.add(
            database_tasks.GetAmphoraDetails(
                rebind={constants.AMPHORA: constants.FAILED_AMPHORA},
                requires=constants.AMPHORA,
                provides=constants.AMP_DATA))

        # Get a new amphora
        # Note: Role doesn't matter here.  We will update it later.
        get_amp_subflow = self.get_amphora_for_lb_subflow(
            prefix=constants.FAILOVER_AMPHORA_FLOW)
        failover_amphora_flow.add(get_amp_subflow)

        # Update the new amphora with the failed amphora details
        failover_amphora_flow.add(database_tasks.UpdateAmpFailoverDetails(
            requires=(constants.AMPHORA, constants.AMP_DATA)))

        # Update the data stored in the flow from the database
        failover_amphora_flow.add(database_tasks.ReloadLoadBalancer(
            requires=constants.LOADBALANCER_ID,
            provides=constants.LOADBALANCER))

        failover_amphora_flow.add(database_tasks.ReloadAmphora(
            requires=constants.AMPHORA,
            provides=constants.AMPHORA))

        # Prepare to reconnect the network interface(s)
        failover_amphora_flow.add(network_tasks.GetAmphoraeNetworkConfigs(
            requires=constants.LOADBALANCER,
            provides=constants.AMPHORAE_NETWORK_CONFIG))
        failover_amphora_flow.add(database_tasks.GetListenersFromLoadbalancer(
            requires=constants.LOADBALANCER, provides=constants.LISTENERS))
        failover_amphora_flow.add(database_tasks.GetAmphoraeFromLoadbalancer(
            requires=constants.LOADBALANCER, provides=constants.AMPHORAE))

        # Plug the VIP ports into the new amphora
        # The reason for moving these steps here is the udp listeners want to
        # do some kernel configuration before Listener update for forbidding
        # failure during rebuild amphora.
        failover_amphora_flow.add(network_tasks.PlugVIPPort(
            requires=(constants.AMPHORA, constants.AMPHORAE_NETWORK_CONFIG)))
        failover_amphora_flow.add(amphora_driver_tasks.AmphoraPostVIPPlug(
            requires=(constants.AMPHORA, constants.LOADBALANCER,
                      constants.AMPHORAE_NETWORK_CONFIG)))

        # Listeners update needs to be run on all amphora to update
        # their peer configurations. So parallelize this with an
        # unordered subflow.
        update_amps_subflow = unordered_flow.Flow(
            constants.UPDATE_AMPS_SUBFLOW)

        timeout_dict = {
            constants.CONN_MAX_RETRIES:
                CONF.haproxy_amphora.active_connection_max_retries,
            constants.CONN_RETRY_INTERVAL:
                CONF.haproxy_amphora.active_connection_rety_interval}

        # Setup parallel flows for each amp. We don't know the new amp
        # details at flow creation time, so setup a subflow for each
        # amp on the LB, they let the task index into a list of amps
        # to find the amphora it should work on.
        amp_index = 0
        db_lb = self.lb_repo.get(db_apis.get_session(),
                                 id=load_balancer[constants.LOADBALANCER_ID])
        for amp in db_lb.amphorae:
            if amp.status == constants.DELETED:
                continue
            update_amps_subflow.add(
                amphora_driver_tasks.AmpListenersUpdate(
                    name=constants.AMP_LISTENER_UPDATE + '-' + str(amp_index),
                    requires=(constants.LOADBALANCER, constants.AMPHORAE),
                    inject={constants.AMPHORA_INDEX: amp_index,
                            constants.TIMEOUT_DICT: timeout_dict}))
            amp_index += 1

        failover_amphora_flow.add(update_amps_subflow)

        # Plug the member networks into the new amphora
        failover_amphora_flow.add(network_tasks.CalculateAmphoraDelta(
            requires=(constants.LOADBALANCER, constants.AMPHORA),
            provides=constants.DELTA))

        failover_amphora_flow.add(network_tasks.HandleNetworkDelta(
            requires=(constants.AMPHORA, constants.DELTA),
            provides=constants.ADDED_PORTS))

        failover_amphora_flow.add(amphora_driver_tasks.AmphoraePostNetworkPlug(
            requires=(constants.LOADBALANCER, constants.ADDED_PORTS)))

        failover_amphora_flow.add(database_tasks.ReloadLoadBalancer(
            name='octavia-failover-LB-reload-2',
            requires=constants.LOADBALANCER_ID,
            provides=constants.LOADBALANCER))

        # Handle the amphora role and VRRP if necessary
        if role == constants.ROLE_MASTER:
            failover_amphora_flow.add(database_tasks.MarkAmphoraMasterInDB(
                name=constants.MARK_AMP_MASTER_INDB,
                requires=constants.AMPHORA))
            vrrp_subflow = self.get_vrrp_subflow(role)
            failover_amphora_flow.add(vrrp_subflow)
        elif role == constants.ROLE_BACKUP:
            failover_amphora_flow.add(database_tasks.MarkAmphoraBackupInDB(
                name=constants.MARK_AMP_BACKUP_INDB,
                requires=constants.AMPHORA))
            vrrp_subflow = self.get_vrrp_subflow(role)
            failover_amphora_flow.add(vrrp_subflow)
        elif role == constants.ROLE_STANDALONE:
            failover_amphora_flow.add(
                database_tasks.MarkAmphoraStandAloneInDB(
                    name=constants.MARK_AMP_STANDALONE_INDB,
                    requires=constants.AMPHORA))

        failover_amphora_flow.add(amphora_driver_tasks.ListenersStart(
            requires=(constants.LOADBALANCER, constants.AMPHORA)))
        failover_amphora_flow.add(
            database_tasks.DisableAmphoraHealthMonitoring(
                rebind={constants.AMPHORA: constants.FAILED_AMPHORA},
                requires=constants.AMPHORA))

        return failover_amphora_flow
Esempio n. 7
0
    def get_failover_amphora_flow(self, failed_amphora, lb_amp_count):
        """Get a Taskflow flow to failover an amphora.

        1. Build a replacement amphora.
        2. Delete the old amphora.
        3. Update the amphorae listener configurations.
        4. Update the VRRP configurations if needed.

        :param failed_amphora: The amphora dict to failover.
        :param lb_amp_count: The number of amphora on this load balancer.
        :returns: The flow that will provide the failover.
        """
        failover_amp_flow = linear_flow.Flow(
            constants.FAILOVER_AMPHORA_FLOW)

        # Revert amphora to status ERROR if this flow goes wrong
        failover_amp_flow.add(lifecycle_tasks.AmphoraToErrorOnRevertTask(
            requires=constants.AMPHORA,
            inject={constants.AMPHORA: failed_amphora}))

        if failed_amphora[constants.ROLE] in (constants.ROLE_MASTER,
                                              constants.ROLE_BACKUP):
            amp_role = 'master_or_backup'
        elif failed_amphora[constants.ROLE] == constants.ROLE_STANDALONE:
            amp_role = 'standalone'
        else:
            amp_role = 'undefined'
        LOG.info("Performing failover for amphora: %s",
                 {"id": failed_amphora[constants.ID],
                  "load_balancer_id": failed_amphora.get(
                      constants.LOAD_BALANCER_ID),
                  "lb_network_ip": failed_amphora.get(constants.LB_NETWORK_IP),
                  "compute_id": failed_amphora.get(constants.COMPUTE_ID),
                  "role": amp_role})

        failover_amp_flow.add(database_tasks.MarkAmphoraPendingDeleteInDB(
            requires=constants.AMPHORA,
            inject={constants.AMPHORA: failed_amphora}))

        failover_amp_flow.add(database_tasks.MarkAmphoraHealthBusy(
            requires=constants.AMPHORA,
            inject={constants.AMPHORA: failed_amphora}))

        failover_amp_flow.add(network_tasks.GetVIPSecurityGroupID(
            requires=constants.LOADBALANCER_ID,
            provides=constants.VIP_SG_ID))

        is_vrrp_ipv6 = False
        if failed_amphora.get(constants.LOAD_BALANCER_ID):
            if failed_amphora.get(constants.VRRP_IP):
                is_vrrp_ipv6 = utils.is_ipv6(failed_amphora[constants.VRRP_IP])

            # Get a replacement amphora and plug all of the networking.
            #
            # Do this early as the compute services have been observed to be
            # unreliable. The community decided the chance that deleting first
            # would open resources for an instance is less likely than the
            # compute service failing to boot an instance for other reasons.

            # TODO(johnsom) Move this back out to run for spares after
            #               delete amphora API is available.
            failover_amp_flow.add(self.get_amphora_for_lb_failover_subflow(
                prefix=constants.FAILOVER_LOADBALANCER_FLOW,
                role=failed_amphora[constants.ROLE],
                failed_amp_vrrp_port_id=failed_amphora.get(
                    constants.VRRP_PORT_ID),
                is_vrrp_ipv6=is_vrrp_ipv6))

        failover_amp_flow.add(
            self.get_delete_amphora_flow(
                failed_amphora,
                retry_attempts=CONF.controller_worker.amphora_delete_retries,
                retry_interval=(
                    CONF.controller_worker.amphora_delete_retry_interval)))
        failover_amp_flow.add(
            database_tasks.DisableAmphoraHealthMonitoring(
                requires=constants.AMPHORA,
                inject={constants.AMPHORA: failed_amphora}))

        if not failed_amphora.get(constants.LOAD_BALANCER_ID):
            # This is an unallocated amphora (bogus), we are done.
            return failover_amp_flow

        failover_amp_flow.add(database_tasks.GetLoadBalancer(
            requires=constants.LOADBALANCER_ID,
            inject={constants.LOADBALANCER_ID:
                    failed_amphora[constants.LOAD_BALANCER_ID]},
            provides=constants.LOADBALANCER))

        failover_amp_flow.add(database_tasks.GetAmphoraeFromLoadbalancer(
            name=constants.GET_AMPHORAE_FROM_LB,
            requires=constants.LOADBALANCER_ID,
            inject={constants.LOADBALANCER_ID:
                    failed_amphora[constants.LOAD_BALANCER_ID]},
            provides=constants.AMPHORAE))

        # Setup timeouts for our requests to the amphorae
        timeout_dict = {
            constants.CONN_MAX_RETRIES:
                CONF.haproxy_amphora.active_connection_max_retries,
            constants.CONN_RETRY_INTERVAL:
                CONF.haproxy_amphora.active_connection_rety_interval}

        # Listeners update needs to be run on all amphora to update
        # their peer configurations. So parallelize this with an
        # unordered subflow.
        update_amps_subflow = unordered_flow.Flow(
            constants.UPDATE_AMPS_SUBFLOW)

        for amp_index in range(0, lb_amp_count):
            update_amps_subflow.add(
                amphora_driver_tasks.AmphoraIndexListenerUpdate(
                    name=str(amp_index) + '-' + constants.AMP_LISTENER_UPDATE,
                    requires=(constants.LOADBALANCER, constants.AMPHORAE),
                    inject={constants.AMPHORA_INDEX: amp_index,
                            constants.TIMEOUT_DICT: timeout_dict}))

        failover_amp_flow.add(update_amps_subflow)

        # Configure and enable keepalived in the amphora
        if lb_amp_count == 2:
            failover_amp_flow.add(
                self.get_vrrp_subflow(constants.GET_VRRP_SUBFLOW,
                                      timeout_dict, create_vrrp_group=False))

        # Reload the listener. This needs to be done here because
        # it will create the required haproxy check scripts for
        # the VRRP deployed above.
        # A "U" or newer amphora-agent will remove the need for this
        # task here.
        # TODO(johnsom) Remove this in the "W" cycle
        reload_listener_subflow = unordered_flow.Flow(
            constants.AMPHORA_LISTENER_RELOAD_SUBFLOW)

        for amp_index in range(0, lb_amp_count):
            reload_listener_subflow.add(
                amphora_driver_tasks.AmphoraIndexListenersReload(
                    name=(str(amp_index) + '-' +
                          constants.AMPHORA_RELOAD_LISTENER),
                    requires=(constants.LOADBALANCER, constants.AMPHORAE),
                    inject={constants.AMPHORA_INDEX: amp_index,
                            constants.TIMEOUT_DICT: timeout_dict}))

        failover_amp_flow.add(reload_listener_subflow)

        # Remove any extraneous ports
        # Note: Nova sometimes fails to delete ports attached to an instance.
        #       For example, if you create an LB with a listener, then
        #       'openstack server delete' the amphora, you will see the vrrp
        #       port attached to that instance will remain after the instance
        #       is deleted.
        # TODO(johnsom) Fix this as part of
        #               https://storyboard.openstack.org/#!/story/2007077

        # Mark LB ACTIVE
        failover_amp_flow.add(
            database_tasks.MarkLBActiveInDB(mark_subobjects=True,
                                            requires=constants.LOADBALANCER))

        return failover_amp_flow