Example #1
0
        def _promote_to_replica_source(old_master, master_candidate,
                                       replica_models):
            # First, we transition from the old master to new as quickly as
            # possible to minimize the scope of unrecoverable error
            old_master.make_read_only(True)
            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            latest_txn_id = old_master.get_latest_txn_id()
            master_candidate.wait_for_txn(latest_txn_id)
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            old_master.attach_replica(master_candidate)
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            # At this point, should something go wrong, there
            # should be a working master with some number of working slaves,
            # and possibly some number of "orphaned" slaves

            exception_replicas = []
            for replica in replica_models:
                try:
                    replica.wait_for_txn(latest_txn_id)
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError:
                    msg = _("promote-to-replica-source: Unable to migrate "
                            "replica %(slave)s from old replica source "
                            "%(old_master)s to new source %(new_master)s.")
                    msg_values = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id
                    }
                    LOG.exception(msg % msg_values)
                    exception_replicas.append(replica.id)

            try:
                old_master.demote_replication_master()
            except Exception:
                LOG.exception(_("Exception demoting old replica source"))
                exception_replicas.append(old_master)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                msg = _("promote-to-replica-source %(id)s: The following "
                        "replicas may not have been switched: %(replicas)s")
                msg_values = {
                    "id": master_candidate.id,
                    "replicas": exception_replicas
                }
                raise ReplicationSlaveAttachError(msg % msg_values)
Example #2
0
        def _eject_replica_source(old_master, replica_models):

            master_candidate = self._most_current_replica(
                old_master, replica_models)

            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            exception_replicas = []
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError:
                    msg = _("eject-replica-source: Unable to migrate "
                            "replica %(slave)s from old replica source "
                            "%(old_master)s to new source %(new_master)s.")
                    msg_values = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id
                    }
                    LOG.exception(msg % msg_values)
                    exception_replicas.append(replica.id)

            if master_candidate.post_processing_required_for_replication():
                new_slaves = list(replica_models)
                new_slaves.remove(master_candidate)
                new_slaves_detail = [
                    slave.get_replication_detail() for slave in new_slaves
                ]
                master_candidate.complete_master_setup(new_slaves_detail)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.EJECTION_ERROR)
                msg = _("eject-replica-source %(id)s: The following "
                        "replicas may not have been switched: %(replicas)s")
                msg_values = {
                    "id": master_candidate.id,
                    "replicas": exception_replicas
                }
                raise ReplicationSlaveAttachError(msg % msg_values)
Example #3
0
        def _eject_replica_source(old_master, replica_models):

            master_candidate = self._most_current_replica(
                old_master, replica_models)

            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            exception_replicas = []
            error_messages = ""
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError as ex:
                    log_fmt = ("Unable to migrate replica %(slave)s from "
                               "old replica source %(old_master)s to "
                               "new source %(new_master)s on eject.")
                    exc_fmt = _("Unable to migrate replica %(slave)s from "
                                "old replica source %(old_master)s to "
                                "new source %(new_master)s on eject.")
                    msg_content = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id
                    }
                    LOG.exception(log_fmt, msg_content)
                    exception_replicas.append(replica)
                    error_messages += "%s (%s)\n" % (exc_fmt % msg_content, ex)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.EJECTION_ERROR)
                msg = (
                    _("eject-replica-source %(id)s: The following "
                      "replicas may not have been switched: %(replicas)s:"
                      "\n%(err)s") % {
                          "id": master_candidate.id,
                          "replicas": [repl.id for repl in exception_replicas],
                          "err": error_messages
                      })
                raise ReplicationSlaveAttachError(msg)
Example #4
0
        def _promote_to_replica_source(old_master, master_candidate,
                                       replica_models):
            # First, we transition from the old master to new as quickly as
            # possible to minimize the scope of unrecoverable error

            # NOTE(zhaochao): we cannot reattach the old master to the new
            # one immediately after the new master is up, because for MariaDB
            # the other replicas are still connecting to the old master, and
            # during reattaching the old master as a slave, new GTID may be
            # created and synced to the replicas. After that, when attaching
            # the replicas to the new master, 'START SLAVE' will fail by
            # 'fatal error 1236' if the binlog of the replica diverged from
            # the new master. So the proper order should be:
            # -1. make the old master read only (and detach floating ips)
            # -2. make sure the new master is up-to-date
            # -3. detach the new master from the old one
            # -4. enable the new master (and attach floating ips)
            # -5. attach the other replicas to the new master
            # -6. attach the old master to the new one
            #     (and attach floating ips)
            # -7. demote the old master
            # What we changed here is the order of the 6th step, previously
            # this step took place right after step 4, which causes failures
            # with MariaDB replications.
            old_master.make_read_only(True)
            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            latest_txn_id = old_master.get_latest_txn_id()
            master_candidate.wait_for_txn(latest_txn_id)
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)

            # At this point, should something go wrong, there
            # should be a working master with some number of working slaves,
            # and possibly some number of "orphaned" slaves

            exception_replicas = []
            error_messages = ""
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError as ex:
                    log_fmt = ("Unable to migrate replica %(slave)s from "
                               "old replica source %(old_master)s to "
                               "new source %(new_master)s on promote.")
                    exc_fmt = _("Unable to migrate replica %(slave)s from "
                                "old replica source %(old_master)s to "
                                "new source %(new_master)s on promote.")
                    msg_content = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id}
                    LOG.exception(log_fmt, msg_content)
                    exception_replicas.append(replica)
                    error_messages += "%s (%s)\n" % (
                        exc_fmt % msg_content, ex)

            # dealing with the old master after all the other replicas
            # has been migrated.
            old_master.attach_replica(master_candidate)
            old_master.attach_public_ips(slave_ips)
            try:
                old_master.demote_replication_master()
            except Exception as ex:
                log_fmt = "Exception demoting old replica source %s."
                exc_fmt = _("Exception demoting old replica source %s.")
                LOG.exception(log_fmt, old_master.id)
                exception_replicas.append(old_master)
                error_messages += "%s (%s)\n" % (
                    exc_fmt % old_master.id, ex)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                msg = (_("promote-to-replica-source %(id)s: The following "
                         "replicas may not have been switched: %(replicas)s:"
                         "\n%(err)s") %
                       {"id": master_candidate.id,
                        "replicas": [repl.id for repl in exception_replicas],
                        "err": error_messages})
                raise ReplicationSlaveAttachError(msg)
Example #5
0
        def _promote_to_replica_source(old_master, master_candidate,
                                       replica_models):
            # First, we transition from the old master to new as quickly as
            # possible to minimize the scope of unrecoverable error
            old_master.make_read_only(True)
            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            latest_txn_id = old_master.get_latest_txn_id()
            master_candidate.wait_for_txn(latest_txn_id)
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            old_master.attach_replica(master_candidate)
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            # At this point, should something go wrong, there
            # should be a working master with some number of working slaves,
            # and possibly some number of "orphaned" slaves

            exception_replicas = []
            error_messages = ""
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError as ex:
                    log_fmt = ("Unable to migrate replica %(slave)s from "
                               "old replica source %(old_master)s to "
                               "new source %(new_master)s on promote.")
                    exc_fmt = _("Unable to migrate replica %(slave)s from "
                                "old replica source %(old_master)s to "
                                "new source %(new_master)s on promote.")
                    msg_content = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id
                    }
                    LOG.exception(log_fmt, msg_content)
                    exception_replicas.append(replica)
                    error_messages += "%s (%s)\n" % (exc_fmt % msg_content, ex)

            try:
                old_master.demote_replication_master()
            except Exception as ex:
                log_fmt = "Exception demoting old replica source %s."
                exc_fmt = _("Exception demoting old replica source %s.")
                LOG.exception(log_fmt, old_master.id)
                exception_replicas.append(old_master)
                error_messages += "%s (%s)\n" % (exc_fmt % old_master.id, ex)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                msg = (
                    _("promote-to-replica-source %(id)s: The following "
                      "replicas may not have been switched: %(replicas)s:"
                      "\n%(err)s") % {
                          "id": master_candidate.id,
                          "replicas": [repl.id for repl in exception_replicas],
                          "err": error_messages
                      })
                raise ReplicationSlaveAttachError(msg)