Example #1
0
    def eject_replica_source(self, context, instance_id):
        def _eject_replica_source(old_master, replica_models):

            master_candidate = self._most_current_replica(
                old_master, replica_models)

            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            exception_replicas = []
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError:
                    msg = _("eject-replica-source: Unable to migrate "
                            "replica %(slave)s from old replica source "
                            "%(old_master)s to new source %(new_master)s.")
                    msg_values = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id
                    }
                    LOG.exception(msg % msg_values)
                    exception_replicas.append(replica.id)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.EJECTION_ERROR)
                msg = _("eject-replica-source %(id)s: The following "
                        "replicas may not have been switched: %(replicas)s")
                msg_values = {
                    "id": master_candidate.id,
                    "replicas": exception_replicas
                }
                raise ReplicationSlaveAttachError(msg % msg_values)

        with EndNotification(context):
            master = BuiltInstanceTasks.load(context, instance_id)
            replicas = [
                BuiltInstanceTasks.load(context, dbinfo.id)
                for dbinfo in master.slaves
            ]
            try:
                _eject_replica_source(master, replicas)
            except ReplicationSlaveAttachError:
                raise
            except Exception:
                self._set_task_status([master] + replicas,
                                      InstanceTasks.EJECTION_ERROR)
                raise
Example #2
0
    def eject_replica_source(self, context, instance_id):

        def _eject_replica_source(old_master, replica_models):

            master_candidate = self._most_current_replica(old_master,
                                                          replica_models)

            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master(for_failover=True)
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            exception_replicas = []
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError:
                    msg = _("eject-replica-source: Unable to migrate "
                            "replica %(slave)s from old replica source "
                            "%(old_master)s to new source %(new_master)s.")
                    msg_values = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id
                    }
                    LOG.exception(msg % msg_values)
                    exception_replicas.append(replica.id)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.EJECTION_ERROR)
                msg = _("eject-replica-source %(id)s: The following "
                        "replicas may not have been switched: %(replicas)s")
                msg_values = {
                    "id": master_candidate.id,
                    "replicas": exception_replicas
                }
                raise ReplicationSlaveAttachError(msg % msg_values)

        with EndNotification(context):
            master = BuiltInstanceTasks.load(context, instance_id)
            replicas = [BuiltInstanceTasks.load(context, dbinfo.id)
                        for dbinfo in master.slaves]
            try:
                _eject_replica_source(master, replicas)
            except ReplicationSlaveAttachError:
                raise
            except Exception:
                self._set_task_status([master] + replicas,
                                      InstanceTasks.EJECTION_ERROR)
                raise
Example #3
0
    def _create_replication_slave(self, context, instance_id, name, flavor,
                                  image_id, databases, users,
                                  datastore_manager, packages, volume_size,
                                  availability_zone, root_password, nics,
                                  overrides, slave_of_id, backup_id,
                                  volume_type, modules):

        if type(instance_id) in [list]:
            ids = instance_id
            root_passwords = root_password
        else:
            ids = [instance_id]
            root_passwords = [root_password]
        replica_number = 0
        replica_backup_id = backup_id
        replica_backup_created = False
        replicas = []

        master_instance_tasks = BuiltInstanceTasks.load(context, slave_of_id)
        server_group = master_instance_tasks.server_group
        scheduler_hints = srv_grp.ServerGroup.convert_to_hint(server_group)
        LOG.debug("Using scheduler hints for locality: %s", scheduler_hints)

        try:
            for replica_index in range(0, len(ids)):
                try:
                    replica_number += 1
                    LOG.debug("Creating replica %(num)d of %(count)d.",
                              {'num': replica_number, 'count': len(ids)})
                    instance_tasks = FreshInstanceTasks.load(
                        context, ids[replica_index])
                    snapshot = instance_tasks.get_replication_master_snapshot(
                        context, slave_of_id, flavor, replica_backup_id,
                        replica_number=replica_number)
                    replica_backup_id = snapshot['dataset']['snapshot_id']
                    replica_backup_created = (replica_backup_id is not None)
                    instance_tasks.create_instance(
                        flavor, image_id, databases, users, datastore_manager,
                        packages, volume_size, replica_backup_id,
                        availability_zone, root_passwords[replica_index],
                        nics, overrides, None, snapshot, volume_type,
                        modules, scheduler_hints)
                    replicas.append(instance_tasks)
                except Exception:
                    # if it's the first replica, then we shouldn't continue
                    LOG.exception(
                        "Could not create replica %(num)d of %(count)d.",
                        {'num': replica_number, 'count': len(ids)})
                    if replica_number == 1:
                        raise

            for replica in replicas:
                replica.wait_for_instance(CONF.restore_usage_timeout, flavor)

        finally:
            if replica_backup_created:
                Backup.delete(context, replica_backup_id)
Example #4
0
    def _create_replication_slave(self, context, instance_id, name, flavor,
                                  image_id, databases, users,
                                  datastore_manager, packages, volume_size,
                                  availability_zone, root_password, nics,
                                  overrides, slave_of_id, backup_id,
                                  volume_type, modules):

        if type(instance_id) in [list]:
            ids = instance_id
            root_passwords = root_password
        else:
            ids = [instance_id]
            root_passwords = [root_password]
        replica_number = 0
        replica_backup_id = backup_id
        replica_backup_created = False
        replicas = []

        master_instance_tasks = BuiltInstanceTasks.load(context, slave_of_id)
        server_group = master_instance_tasks.server_group
        scheduler_hints = srv_grp.ServerGroup.convert_to_hint(server_group)
        LOG.debug("Using scheduler hints for locality: %s" % scheduler_hints)

        try:
            for replica_index in range(0, len(ids)):
                try:
                    replica_number += 1
                    LOG.debug("Creating replica %d of %d."
                              % (replica_number, len(ids)))
                    instance_tasks = FreshInstanceTasks.load(
                        context, ids[replica_index])
                    snapshot = instance_tasks.get_replication_master_snapshot(
                        context, slave_of_id, flavor, replica_backup_id,
                        replica_number=replica_number)
                    replica_backup_id = snapshot['dataset']['snapshot_id']
                    replica_backup_created = (replica_backup_id is not None)
                    instance_tasks.create_instance(
                        flavor, image_id, databases, users, datastore_manager,
                        packages, volume_size, replica_backup_id,
                        availability_zone, root_passwords[replica_index],
                        nics, overrides, None, snapshot, volume_type,
                        modules, scheduler_hints)
                    replicas.append(instance_tasks)
                except Exception:
                    # if it's the first replica, then we shouldn't continue
                    LOG.exception(_(
                        "Could not create replica %(num)d of %(count)d.")
                        % {'num': replica_number, 'count': len(ids)})
                    if replica_number == 1:
                        raise

            for replica in replicas:
                replica.wait_for_instance(CONF.restore_usage_timeout, flavor)

        finally:
            if replica_backup_created:
                Backup.delete(context, replica_backup_id)
Example #5
0
 def detach_replica(self, context, instance_id):
     with EndNotification(context):
         slave = models.BuiltInstanceTasks.load(context, instance_id)
         master_id = slave.slave_of_id
         master = models.BuiltInstanceTasks.load(context, master_id)
         slave.detach_replica(master)
         if master.post_processing_required_for_replication():
             slave_instances = [BuiltInstanceTasks.load(
                 context, slave_model.id) for slave_model in master.slaves]
             slave_detail = [slave_instance.get_replication_detail()
                             for slave_instance in slave_instances]
             master.complete_master_setup(slave_detail)
Example #6
0
 def detach_replica(self, context, instance_id):
     with EndNotification(context):
         slave = models.BuiltInstanceTasks.load(context, instance_id)
         master_id = slave.slave_of_id
         master = models.BuiltInstanceTasks.load(context, master_id)
         slave.detach_replica(master)
         if master.post_processing_required_for_replication():
             slave_instances = [BuiltInstanceTasks.load(
                 context, slave_model.id) for slave_model in master.slaves]
             slave_detail = [slave_instance.get_replication_detail()
                             for slave_instance in slave_instances]
             master.complete_master_setup(slave_detail)
    def _get_instance_task(self):
        dbinst = DBInstance(InstanceTasks.NONE,name = 'name',
                                   created ='created',
                                   compute_instance_id = 'compute_instance_id',
                                   task_id = 'task_id',
                                    task_description = 'task_description',
                                    task_start_time = 'task_start_time',
                                    volume_id = 'volume_id',
                                    deleted = 'deleted',
                                    tenant_id = 'tenant_id',
                                    service_type = 'service_type')
        server = fake()
        service_status = fake()
        service_status.status = ServiceStatuses.RUNNING
#         inst = BaseInstance(self.context,dbinst,server,service_status)
        inst = BuiltInstanceTasks(self.context,dbinst,server,service_status)
        when(inst_models).load_instance(any(), any(), any(), needs_server=any()).thenReturn(inst)
        instance_tasks = BuiltInstanceTasks.load(self.context,"instance_id")
        return instance_tasks
Example #8
0
    def promote_to_replica_source(self, context, instance_id):
        # TODO(atomic77) Promote and eject need to be able to handle the case
        # where a datastore like Postgresql needs to treat the slave to be
        # promoted differently from the old master and the slaves which will
        # be simply reassigned to a new master. See:
        # https://bugs.launchpad.net/trove/+bug/1553339

        def _promote_to_replica_source(old_master, master_candidate,
                                       replica_models):
            # First, we transition from the old master to new as quickly as
            # possible to minimize the scope of unrecoverable error
            old_master.make_read_only(True)
            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            latest_txn_id = old_master.get_latest_txn_id()
            master_candidate.wait_for_txn(latest_txn_id)
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            old_master.attach_replica(master_candidate)
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            # At this point, should something go wrong, there
            # should be a working master with some number of working slaves,
            # and possibly some number of "orphaned" slaves

            exception_replicas = []
            error_messages = ""
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError as ex:
                    msg = (_("Unable to migrate replica %(slave)s from "
                             "old replica source %(old_master)s to "
                             "new source %(new_master)s on promote.") % {
                                 "slave": replica.id,
                                 "old_master": old_master.id,
                                 "new_master": master_candidate.id
                             })
                    LOG.exception(msg)
                    exception_replicas.append(replica)
                    error_messages += "%s (%s)\n" % (msg, ex)

            try:
                old_master.demote_replication_master()
            except Exception as ex:
                msg = (_("Exception demoting old replica source %s.") %
                       old_master.id)
                LOG.exception(msg)
                exception_replicas.append(old_master)
                error_messages += "%s (%s)\n" % (msg, ex)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                msg = (
                    _("promote-to-replica-source %(id)s: The following "
                      "replicas may not have been switched: %(replicas)s:"
                      "\n%(err)s") % {
                          "id": master_candidate.id,
                          "replicas": [repl.id for repl in exception_replicas],
                          "err": error_messages
                      })
                raise ReplicationSlaveAttachError(msg)

        with EndNotification(context):
            master_candidate = BuiltInstanceTasks.load(context, instance_id)
            old_master = BuiltInstanceTasks.load(context,
                                                 master_candidate.slave_of_id)
            replicas = []
            for replica_dbinfo in old_master.slaves:
                if replica_dbinfo.id == instance_id:
                    replica = master_candidate
                else:
                    replica = BuiltInstanceTasks.load(context,
                                                      replica_dbinfo.id)
                replicas.append(replica)

            try:
                _promote_to_replica_source(old_master, master_candidate,
                                           replicas)
            except ReplicationSlaveAttachError:
                raise
            except Exception:
                self._set_task_status([old_master] + replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                raise
Example #9
0
    def promote_to_replica_source(self, context, instance_id):
        def _promote_to_replica_source(old_master, master_candidate,
                                       replica_models):
            # First, we transition from the old master to new as quickly as
            # possible to minimize the scope of unrecoverable error
            old_master.make_read_only(True)
            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            latest_txn_id = old_master.get_latest_txn_id()
            master_candidate.wait_for_txn(latest_txn_id)
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            old_master.attach_replica(master_candidate)
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            # At this point, should something go wrong, there
            # should be a working master with some number of working slaves,
            # and possibly some number of "orphaned" slaves

            exception_replicas = []
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError:
                    msg = _("promote-to-replica-source: Unable to migrate "
                            "replica %(slave)s from old replica source "
                            "%(old_master)s to new source %(new_master)s.")
                    msg_values = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id
                    }
                    LOG.exception(msg % msg_values)
                    exception_replicas.append(replica)

            try:
                old_master.demote_replication_master()
            except Exception:
                LOG.exception(_("Exception demoting old replica source"))
                exception_replicas.append(old_master)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                msg = _("promote-to-replica-source %(id)s: The following "
                        "replicas may not have been switched: %(replicas)s")
                msg_values = {
                    "id": master_candidate.id,
                    "replicas": exception_replicas
                }
                raise ReplicationSlaveAttachError(msg % msg_values)

        with EndNotification(context):
            master_candidate = BuiltInstanceTasks.load(context, instance_id)
            old_master = BuiltInstanceTasks.load(context,
                                                 master_candidate.slave_of_id)
            replicas = []
            for replica_dbinfo in old_master.slaves:
                if replica_dbinfo.id == instance_id:
                    replica = master_candidate
                else:
                    replica = BuiltInstanceTasks.load(context,
                                                      replica_dbinfo.id)
                replicas.append(replica)

            try:
                _promote_to_replica_source(old_master, master_candidate,
                                           replicas)
            except ReplicationSlaveAttachError:
                raise
            except Exception:
                self._set_task_status([old_master] + replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                raise
Example #10
0
    def eject_replica_source(self, context, instance_id):

        def _eject_replica_source(old_master, replica_models):

            master_candidate = self._most_current_replica(old_master,
                                                          replica_models)

            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            master_candidate.detach_replica(old_master, for_failover=True,
                                            for_promote=True)
            master_candidate.enable_as_master()
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            exception_replicas = []
            error_messages = ""
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError as ex:
                    msg = (_("Unable to migrate replica %(slave)s from "
                             "old replica source %(old_master)s to "
                             "new source %(new_master)s on eject.") %
                           {"slave": replica.id,
                            "old_master": old_master.id,
                            "new_master": master_candidate.id})
                    LOG.exception(msg)
                    exception_replicas.append(replica)
                    error_messages += "%s (%s)\n" % (msg, ex)

            if master_candidate.post_processing_required_for_replication():
                new_slaves = list(replica_models)
                new_slaves.remove(master_candidate)
                new_slaves_detail = [slave.get_replication_detail()
                                     for slave in new_slaves]
                master_candidate.complete_master_setup(new_slaves_detail)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.EJECTION_ERROR)
                msg = (_("eject-replica-source %(id)s: The following "
                         "replicas may not have been switched: %(replicas)s") %
                       {"id": master_candidate.id,
                        "replicas": [repl.id for repl in exception_replicas]})
                raise ReplicationSlaveAttachError("%s:\n%s" %
                                                  (msg, error_messages))

        with EndNotification(context):
            master = BuiltInstanceTasks.load(context, instance_id)
            replicas = [BuiltInstanceTasks.load(context, dbinfo.id)
                        for dbinfo in master.slaves]
            try:
                _eject_replica_source(master, replicas)
            except ReplicationSlaveAttachError:
                raise
            except Exception:
                self._set_task_status([master] + replicas,
                                      InstanceTasks.EJECTION_ERROR)
                raise
Example #11
0
    def promote_to_replica_source(self, context, instance_id):
        # TODO(atomic77) Promote and eject need to be able to handle the case
        # where a datastore like Postgresql needs to treat the slave to be
        # promoted differently from the old master and the slaves which will
        # be simply reassigned to a new master. See:
        # https://bugs.launchpad.net/trove/+bug/1553339

        def _promote_to_replica_source(old_master, master_candidate,
                                       replica_models):
            # First, we transition from the old master to new as quickly as
            # possible to minimize the scope of unrecoverable error

            # NOTE(zhaochao): we cannot reattach the old master to the new
            # one immediately after the new master is up, because for MariaDB
            # the other replicas are still connecting to the old master, and
            # during reattaching the old master as a slave, new GTID may be
            # created and synced to the replicas. After that, when attaching
            # the replicas to the new master, 'START SLAVE' will fail by
            # 'fatal error 1236' if the binlog of the replica diverged from
            # the new master. So the proper order should be:
            # -1. make the old master read only (and detach floating ips)
            # -2. make sure the new master is up-to-date
            # -3. detach the new master from the old one
            # -4. enable the new master (and attach floating ips)
            # -5. attach the other replicas to the new master
            # -6. attach the old master to the new one
            #     (and attach floating ips)
            # -7. demote the old master
            # What we changed here is the order of the 6th step, previously
            # this step took place right after step 4, which causes failures
            # with MariaDB replications.
            old_master.make_read_only(True)
            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            latest_txn_id = old_master.get_latest_txn_id()
            master_candidate.wait_for_txn(latest_txn_id)
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)

            # At this point, should something go wrong, there
            # should be a working master with some number of working slaves,
            # and possibly some number of "orphaned" slaves

            exception_replicas = []
            error_messages = ""
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError as ex:
                    log_fmt = ("Unable to migrate replica %(slave)s from "
                               "old replica source %(old_master)s to "
                               "new source %(new_master)s on promote.")
                    exc_fmt = _("Unable to migrate replica %(slave)s from "
                                "old replica source %(old_master)s to "
                                "new source %(new_master)s on promote.")
                    msg_content = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id}
                    LOG.exception(log_fmt, msg_content)
                    exception_replicas.append(replica)
                    error_messages += "%s (%s)\n" % (
                        exc_fmt % msg_content, ex)

            # dealing with the old master after all the other replicas
            # has been migrated.
            old_master.attach_replica(master_candidate)
            old_master.attach_public_ips(slave_ips)
            try:
                old_master.demote_replication_master()
            except Exception as ex:
                log_fmt = "Exception demoting old replica source %s."
                exc_fmt = _("Exception demoting old replica source %s.")
                LOG.exception(log_fmt, old_master.id)
                exception_replicas.append(old_master)
                error_messages += "%s (%s)\n" % (
                    exc_fmt % old_master.id, ex)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                msg = (_("promote-to-replica-source %(id)s: The following "
                         "replicas may not have been switched: %(replicas)s:"
                         "\n%(err)s") %
                       {"id": master_candidate.id,
                        "replicas": [repl.id for repl in exception_replicas],
                        "err": error_messages})
                raise ReplicationSlaveAttachError(msg)

        with EndNotification(context):
            master_candidate = BuiltInstanceTasks.load(context, instance_id)
            old_master = BuiltInstanceTasks.load(context,
                                                 master_candidate.slave_of_id)
            replicas = []
            for replica_dbinfo in old_master.slaves:
                if replica_dbinfo.id == instance_id:
                    replica = master_candidate
                else:
                    replica = BuiltInstanceTasks.load(context,
                                                      replica_dbinfo.id)
                replicas.append(replica)

            try:
                _promote_to_replica_source(old_master, master_candidate,
                                           replicas)
            except ReplicationSlaveAttachError:
                raise
            except Exception:
                self._set_task_status([old_master] + replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                raise
Example #12
0
    def promote_to_replica_source(self, context, instance_id):
        # TODO(atomic77) Promote and eject need to be able to handle the case
        # where a datastore like Postgresql needs to treat the slave to be
        # promoted differently from the old master and the slaves which will
        # be simply reassigned to a new master. See:
        # https://bugs.launchpad.net/trove/+bug/1553339

        def _promote_to_replica_source(old_master, master_candidate,
                                       replica_models):
            # First, we transition from the old master to new as quickly as
            # possible to minimize the scope of unrecoverable error
            old_master.make_read_only(True)
            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            latest_txn_id = old_master.get_latest_txn_id()
            master_candidate.wait_for_txn(latest_txn_id)
            old_master.pre_replication_demote()
            master_candidate.detach_replica(old_master, for_failover=True,
                                            for_promote=True)
            master_candidate.enable_as_master()
            old_master.attach_replica(master_candidate)
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            # At this point, should something go wrong, there
            # should be a working master with some number of working slaves,
            # and possibly some number of "orphaned" slaves

            exception_replicas = []
            error_messages = ""
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError as ex:
                    msg = (_("Unable to migrate replica %(slave)s from "
                             "old replica source %(old_master)s to "
                             "new source %(new_master)s on promote.") %
                           {"slave": replica.id,
                            "old_master": old_master.id,
                            "new_master": master_candidate.id})
                    LOG.exception(msg)
                    exception_replicas.append(replica)
                    error_messages += "%s (%s)\n" % (msg, ex)

            try:
                old_master.demote_replication_master()
            except Exception as ex:
                msg = (_("Exception demoting old replica source %s.") %
                       old_master.id)
                LOG.exception(msg)
                exception_replicas.append(old_master)
                error_messages += "%s (%s)\n" % (msg, ex)

            if master_candidate.post_processing_required_for_replication():
                new_slaves = list(replica_models)
                new_slaves.remove(master_candidate)
                new_slaves.append(old_master)
                new_slaves_detail = [slave.get_replication_detail()
                                     for slave in new_slaves]
                master_candidate.complete_master_setup(new_slaves_detail)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                msg = (_("promote-to-replica-source %(id)s: The following "
                         "replicas may not have been switched: %(replicas)s") %
                       {"id": master_candidate.id,
                        "replicas": [repl.id for repl in exception_replicas]})
                raise ReplicationSlaveAttachError("%s:\n%s" %
                                                  (msg, error_messages))

        with EndNotification(context):
            master_candidate = BuiltInstanceTasks.load(context, instance_id)
            old_master = BuiltInstanceTasks.load(context,
                                                 master_candidate.slave_of_id)
            replicas = []
            for replica_dbinfo in old_master.slaves:
                if replica_dbinfo.id == instance_id:
                    replica = master_candidate
                else:
                    replica = BuiltInstanceTasks.load(context,
                                                      replica_dbinfo.id)
                replicas.append(replica)

            try:
                _promote_to_replica_source(old_master, master_candidate,
                                           replicas)
            except ReplicationSlaveAttachError:
                raise
            except Exception:
                self._set_task_status([old_master] + replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                raise
Example #13
0
    def eject_replica_source(self, context, instance_id):

        def _eject_replica_source(old_master, replica_models):

            master_candidate = self._most_current_replica(old_master,
                                                          replica_models)

            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            exception_replicas = []
            error_messages = ""
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError as ex:
                    log_fmt = ("Unable to migrate replica %(slave)s from "
                               "old replica source %(old_master)s to "
                               "new source %(new_master)s on eject.")
                    exc_fmt = _("Unable to migrate replica %(slave)s from "
                                "old replica source %(old_master)s to "
                                "new source %(new_master)s on eject.")
                    msg_content = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id}
                    LOG.exception(log_fmt, msg_content)
                    exception_replicas.append(replica)
                    error_messages += "%s (%s)\n" % (
                        exc_fmt % msg_content, ex)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.EJECTION_ERROR)
                msg = (_("eject-replica-source %(id)s: The following "
                         "replicas may not have been switched: %(replicas)s:"
                         "\n%(err)s") %
                       {"id": master_candidate.id,
                        "replicas": [repl.id for repl in exception_replicas],
                        "err": error_messages})
                raise ReplicationSlaveAttachError(msg)

        with EndNotification(context):
            master = BuiltInstanceTasks.load(context, instance_id)
            replicas = [BuiltInstanceTasks.load(context, dbinfo.id)
                        for dbinfo in master.slaves]
            try:
                _eject_replica_source(master, replicas)
            except ReplicationSlaveAttachError:
                raise
            except Exception:
                self._set_task_status([master] + replicas,
                                      InstanceTasks.EJECTION_ERROR)
                raise
Example #14
0
    def promote_to_replica_source(self, context, instance_id):

        def _promote_to_replica_source(old_master, master_candidate,
                                       replica_models):
            # First, we transition from the old master to new as quickly as
            # possible to minimize the scope of unrecoverable error
            old_master.make_read_only(True)
            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            latest_txn_id = old_master.get_latest_txn_id()
            master_candidate.wait_for_txn(latest_txn_id)
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            old_master.attach_replica(master_candidate)
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            # At this point, should something go wrong, there
            # should be a working master with some number of working slaves,
            # and possibly some number of "orphaned" slaves

            exception_replicas = []
            for replica in replica_models:
                try:
                    replica.wait_for_txn(latest_txn_id)
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError:
                    msg = _("promote-to-replica-source: Unable to migrate "
                            "replica %(slave)s from old replica source "
                            "%(old_master)s to new source %(new_master)s.")
                    msg_values = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id
                    }
                    LOG.exception(msg % msg_values)
                    exception_replicas.append(replica.id)

            try:
                old_master.demote_replication_master()
            except Exception:
                LOG.exception(_("Exception demoting old replica source"))
                exception_replicas.append(old_master)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.PROMOTION_ERROR)
                msg = _("promote-to-replica-source %(id)s: The following "
                        "replicas may not have been switched: %(replicas)s")
                msg_values = {
                    "id": master_candidate.id,
                    "replicas": exception_replicas
                }
                raise ReplicationSlaveAttachError(msg % msg_values)

        master_candidate = BuiltInstanceTasks.load(context, instance_id)
        old_master = BuiltInstanceTasks.load(context,
                                             master_candidate.slave_of_id)
        replicas = []
        for replica_dbinfo in old_master.slaves:
            if replica_dbinfo.id == instance_id:
                replica = master_candidate
            else:
                replica = BuiltInstanceTasks.load(context, replica_dbinfo.id)
            replicas.append(replica)

        try:
            _promote_to_replica_source(old_master, master_candidate, replicas)
        except ReplicationSlaveAttachError:
            raise
        except Exception:
            self._set_task_status([old_master] + replicas,
                                  InstanceTasks.PROMOTION_ERROR)
            raise
Example #15
0
    def eject_replica_source(self, context, instance_id):

        def _eject_replica_source(old_master, replica_models):

            # Select the slave with the greatest number of transactions to
            # be the new master.
            # TODO(mwj): Replace this heuristic with code to store the
            # site id of the master then use it to determine which slave
            # has the most recent txn from that master.
            master_candidate = None
            max_txn_count = 0
            for replica in replica_models:
                txn_count = replica.get_txn_count()
                if txn_count > max_txn_count:
                    master_candidate = replica
                    max_txn_count = txn_count

            master_ips = old_master.detach_public_ips()
            slave_ips = master_candidate.detach_public_ips()
            master_candidate.detach_replica(old_master, for_failover=True)
            master_candidate.enable_as_master()
            master_candidate.attach_public_ips(master_ips)
            master_candidate.make_read_only(False)
            old_master.attach_public_ips(slave_ips)

            exception_replicas = []
            for replica in replica_models:
                try:
                    if replica.id != master_candidate.id:
                        replica.detach_replica(old_master, for_failover=True)
                        replica.attach_replica(master_candidate)
                except exception.TroveError:
                    msg = _("eject-replica-source: Unable to migrate "
                            "replica %(slave)s from old replica source "
                            "%(old_master)s to new source %(new_master)s.")
                    msg_values = {
                        "slave": replica.id,
                        "old_master": old_master.id,
                        "new_master": master_candidate.id
                    }
                    LOG.exception(msg % msg_values)
                    exception_replicas.append(replica.id)

            self._set_task_status([old_master] + replica_models,
                                  InstanceTasks.NONE)
            if exception_replicas:
                self._set_task_status(exception_replicas,
                                      InstanceTasks.EJECTION_ERROR)
                msg = _("eject-replica-source %(id)s: The following "
                        "replicas may not have been switched: %(replicas)s")
                msg_values = {
                    "id": master_candidate.id,
                    "replicas": exception_replicas
                }
                raise ReplicationSlaveAttachError(msg % msg_values)

        master = BuiltInstanceTasks.load(context, instance_id)
        replicas = [BuiltInstanceTasks.load(context, dbinfo.id)
                    for dbinfo in master.slaves]
        try:
            _eject_replica_source(master, replicas)
        except ReplicationSlaveAttachError:
            raise
        except Exception:
            self._set_task_status([master] + replicas,
                                  InstanceTasks.EJECTION_ERROR)
            raise
Example #16
0
    def _create_replication_slave(self, context, instance_id, name, flavor,
                                  image_id, databases, users,
                                  datastore_manager, packages, volume_size,
                                  availability_zone, root_password, nics,
                                  overrides, slave_of_id, backup_id,
                                  volume_type, modules):

        if type(instance_id) in [list]:
            ids = instance_id
            root_passwords = root_password
        else:
            ids = [instance_id]
            root_passwords = [root_password]
        replica_number = 0
        replica_backup_id = backup_id
        replica_backup_created = False
        replicas = []

        master_instance_tasks = BuiltInstanceTasks.load(context, slave_of_id)
        server_group = master_instance_tasks.server_group
        scheduler_hints = srv_grp.ServerGroup.convert_to_hint(server_group)
        LOG.debug("Using scheduler hints for locality: %s" % scheduler_hints)

        try:
            for replica_index in range(0, len(ids)):
                try:
                    replica_number += 1
                    LOG.debug("Creating replica %d of %d." %
                              (replica_number, len(ids)))
                    instance_tasks = FreshInstanceTasks.load(
                        context, ids[replica_index])
                    snapshot = instance_tasks.get_replication_master_snapshot(
                        context,
                        slave_of_id,
                        flavor,
                        replica_backup_id,
                        replica_number=replica_number)
                    replica_backup_id = snapshot['dataset']['snapshot_id']
                    replica_backup_created = (replica_backup_id is not None)
                    instance_tasks.create_instance(
                        flavor, image_id, databases, users, datastore_manager,
                        packages, volume_size, replica_backup_id,
                        availability_zone, root_passwords[replica_index], nics,
                        overrides, None, snapshot, volume_type, modules,
                        scheduler_hints)
                    replicas.append(instance_tasks)
                except Exception:
                    # if it's the first replica, then we shouldn't continue
                    LOG.exception(
                        _("Could not create replica %(num)d of %(count)d.") % {
                            'num': replica_number,
                            'count': len(ids)
                        })
                    if replica_number == 1:
                        raise

            for replica in replicas:
                replica.wait_for_instance(CONF.restore_usage_timeout, flavor)

            # Some datastores requires completing configuration of replication
            # nodes with information that is only available after all the
            # instances has been started.
            if (master_instance_tasks.post_processing_required_for_replication(
            )):
                slave_instances = [
                    BuiltInstanceTasks.load(context, slave.id)
                    for slave in master_instance_tasks.slaves
                ]

                # Collect info from each slave post instance launch
                slave_detail = [
                    slave_instance.get_replication_detail()
                    for slave_instance in slave_instances
                ]

                # Pass info of all replication nodes to the master for
                # replication setup completion
                master_detail = master_instance_tasks.get_replication_detail()
                master_instance_tasks.complete_master_setup(slave_detail)

                # Pass info of all replication nodes to each slave for
                # replication setup completion
                for slave_instance in slave_instances:
                    slave_instance.complete_slave_setup(
                        master_detail, slave_detail)

                # Push pending data/transactions from master to slaves
                master_instance_tasks.sync_data_to_slaves()

                # Set the status of all slave nodes to ACTIVE
                for slave_instance in slave_instances:
                    slave_guest = remote.create_guest_client(
                        slave_instance.context, slave_instance.db_info.id,
                        slave_instance.datastore_version.manager)
                    slave_guest.cluster_complete()

        finally:
            if replica_backup_created:
                Backup.delete(context, replica_backup_id)
Example #17
0
    def _create_replication_slave(self, context, instance_id, name, flavor,
                                  image_id, databases, users,
                                  datastore_manager, packages, volume_size,
                                  availability_zone, root_password, nics,
                                  overrides, slave_of_id, backup_id,
                                  volume_type, modules):

        if type(instance_id) in [list]:
            ids = instance_id
            root_passwords = root_password
        else:
            ids = [instance_id]
            root_passwords = [root_password]
        replica_number = 0
        replica_backup_id = backup_id
        replica_backup_created = False
        replicas = []

        master_instance_tasks = BuiltInstanceTasks.load(context, slave_of_id)
        server_group = master_instance_tasks.server_group
        scheduler_hints = srv_grp.ServerGroup.convert_to_hint(server_group)
        LOG.debug("Using scheduler hints for locality: %s" % scheduler_hints)

        try:
            for replica_index in range(0, len(ids)):
                try:
                    replica_number += 1
                    LOG.debug("Creating replica %d of %d."
                              % (replica_number, len(ids)))
                    instance_tasks = FreshInstanceTasks.load(
                        context, ids[replica_index])
                    snapshot = instance_tasks.get_replication_master_snapshot(
                        context, slave_of_id, flavor, replica_backup_id,
                        replica_number=replica_number)
                    replica_backup_id = snapshot['dataset']['snapshot_id']
                    replica_backup_created = (replica_backup_id is not None)
                    instance_tasks.create_instance(
                        flavor, image_id, databases, users, datastore_manager,
                        packages, volume_size, replica_backup_id,
                        availability_zone, root_passwords[replica_index],
                        nics, overrides, None, snapshot, volume_type,
                        modules, scheduler_hints)
                    replicas.append(instance_tasks)
                except Exception:
                    # if it's the first replica, then we shouldn't continue
                    LOG.exception(_(
                        "Could not create replica %(num)d of %(count)d.")
                        % {'num': replica_number, 'count': len(ids)})
                    if replica_number == 1:
                        raise

            for replica in replicas:
                replica.wait_for_instance(CONF.restore_usage_timeout, flavor)

            # Some datastores requires completing configuration of replication
            # nodes with information that is only available after all the
            # instances has been started.
            if (master_instance_tasks
                    .post_processing_required_for_replication()):
                slave_instances = [BuiltInstanceTasks.load(context, slave.id)
                                   for slave in master_instance_tasks.slaves]

                # Collect info from each slave post instance launch
                slave_detail = [slave_instance.get_replication_detail()
                                for slave_instance in slave_instances]

                # Pass info of all replication nodes to the master for
                # replication setup completion
                master_detail = master_instance_tasks.get_replication_detail()
                master_instance_tasks.complete_master_setup(slave_detail)

                # Pass info of all replication nodes to each slave for
                # replication setup completion
                for slave_instance in slave_instances:
                    slave_instance.complete_slave_setup(master_detail,
                                                        slave_detail)

                # Push pending data/transactions from master to slaves
                master_instance_tasks.sync_data_to_slaves()

                # Set the status of all slave nodes to ACTIVE
                for slave_instance in slave_instances:
                    slave_guest = remote.create_guest_client(
                        slave_instance.context, slave_instance.db_info.id,
                        slave_instance.datastore_version.manager)
                    slave_guest.cluster_complete()

        finally:
            if replica_backup_created:
                Backup.delete(context, replica_backup_id)
Example #18
0
    def _create_replication_slave(self,
                                  context,
                                  instance_id,
                                  name,
                                  flavor,
                                  image_id,
                                  databases,
                                  users,
                                  datastore_manager,
                                  packages,
                                  volume_size,
                                  availability_zone,
                                  root_password,
                                  nics,
                                  overrides,
                                  slave_of_id,
                                  backup_id,
                                  volume_type,
                                  modules,
                                  access=None,
                                  ds_version=None):

        if type(instance_id) in [list]:
            ids = instance_id
            root_passwords = root_password
        else:
            ids = [instance_id]
            root_passwords = [root_password]
        replica_number = 0
        replica_backup_id = backup_id
        replicas = []

        master_instance_tasks = BuiltInstanceTasks.load(context, slave_of_id)
        server_group = master_instance_tasks.server_group
        scheduler_hints = srv_grp.ServerGroup.convert_to_hint(server_group)
        LOG.debug("Using scheduler hints %s for creating instance %s",
                  scheduler_hints, instance_id)

        # Create backup for master
        snapshot = None
        try:
            instance_tasks = FreshInstanceTasks.load(context, ids[0])
            snapshot = instance_tasks.get_replication_master_snapshot(
                context,
                slave_of_id,
                flavor,
                parent_backup_id=replica_backup_id)
            LOG.info('Snapshot info for creating replica of %s: %s',
                     slave_of_id, snapshot)
        except Exception as err:
            LOG.error(
                'Failed to get master snapshot info for creating '
                'replica, error: %s', str(err))

            if snapshot and snapshot.get('dataset', {}).get('snapshot_id'):
                backup_id = snapshot['dataset']['snapshot_id']
                Backup.delete(context, backup_id)

            raise

        # Create replicas using the master backup
        replica_backup_id = snapshot['dataset']['snapshot_id']
        try:
            for replica_index in range(0, len(ids)):
                replica_number += 1
                LOG.info(f"Creating replica {replica_number} "
                         f"({ids[replica_index]}) of {len(ids)}.")

                instance_tasks = FreshInstanceTasks.load(
                    context, ids[replica_index])
                instance_tasks.create_instance(flavor,
                                               image_id,
                                               databases,
                                               users,
                                               datastore_manager,
                                               packages,
                                               volume_size,
                                               replica_backup_id,
                                               availability_zone,
                                               root_passwords[replica_index],
                                               nics,
                                               overrides,
                                               None,
                                               snapshot,
                                               volume_type,
                                               modules,
                                               scheduler_hints,
                                               access=access,
                                               ds_version=ds_version)
                replicas.append(instance_tasks)

            for replica in replicas:
                replica.wait_for_instance(CONF.restore_usage_timeout, flavor)
                LOG.info('Replica %s created successfully', replica.id)
        except Exception as err:
            LOG.error('Failed to create replica from %s, error: %s',
                      slave_of_id, str(err))
            raise
        finally:
            Backup.delete(context, replica_backup_id)