Example #1
0
        def _grow_cluster():

            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            cluster_head = next(
                Instance.load(context, db_inst.id) for db_inst in db_instances
                if db_inst.id not in new_instance_ids)
            if not cluster_head:
                raise TroveError(
                    _("Unable to determine existing Redis cluster"
                      " member"))

            (cluster_head_ip,
             cluster_head_port) = (self.get_guest(cluster_head).get_node_ip())

            # Wait for cluster members to get to cluster-ready status.
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                return

            LOG.debug("All members ready, proceeding for cluster setup.")
            new_insts = [
                Instance.load(context, instance_id)
                for instance_id in new_instance_ids
            ]
            new_guests = map(self.get_guest, new_insts)

            # Connect nodes to the cluster head
            for guest in new_guests:
                guest.cluster_meet(cluster_head_ip, cluster_head_port)

            for guest in new_guests:
                guest.cluster_complete()
Example #2
0
        def _add_shard_cluster():

            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False,
                                               shard_id=shard_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]
            LOG.debug("instances in shard %(shard_id)s: %(instance_ids)s",
                      {'shard_id': shard_id, 'instance_ids': instance_ids})
            if not self._all_instances_ready(instance_ids, cluster_id,
                                             shard_id):
                return

            members = [Instance.load(context, instance_id)
                       for instance_id in instance_ids]

            db_query_routers = DBInstance.find_all(cluster_id=cluster_id,
                                                   type='query_router',
                                                   deleted=False).all()
            query_routers = [Instance.load(context, db_query_router.id)
                             for db_query_router in db_query_routers]

            if not self._create_shard(query_routers[0], members):
                return

            for member in members:
                self.get_guest(member).cluster_complete()
Example #3
0
        def _grow_cluster():

            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            cluster_head = next(Instance.load(context, db_inst.id)
                                for db_inst in db_instances
                                if db_inst.id not in new_instance_ids)
            if not cluster_head:
                raise TroveError("Unable to determine existing Redis cluster "
                                 "member")

            (cluster_head_ip, cluster_head_port) = (
                self.get_guest(cluster_head).get_node_ip())

            # Wait for cluster members to get to cluster-ready status.
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                return

            LOG.debug("All members ready, proceeding for cluster setup.")
            new_insts = [Instance.load(context, instance_id)
                         for instance_id in new_instance_ids]
            new_guests = map(self.get_guest, new_insts)

            # Connect nodes to the cluster head
            for guest in new_guests:
                guest.cluster_meet(cluster_head_ip, cluster_head_port)

            for guest in new_guests:
                guest.cluster_complete()
Example #4
0
        def _add_shard_cluster():

            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               shard_id=shard_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]
            LOG.debug("instances in shard %s: %s" % (shard_id, instance_ids))
            if not self._all_instances_ready(instance_ids, cluster_id,
                                             shard_id):
                return

            members = [
                Instance.load(context, instance_id)
                for instance_id in instance_ids
            ]

            if not self._create_replica_set(members, cluster_id, shard_id):
                return

            db_query_routers = DBInstance.find_all(cluster_id=cluster_id,
                                                   type='query_router',
                                                   deleted=False).all()
            query_routers = [
                Instance.load(context, db_query_router.id)
                for db_query_router in db_query_routers
            ]

            if not self._create_shard(query_routers, replica_set_name, members,
                                      cluster_id, shard_id):
                return

            for member in members:
                self.get_guest(member).cluster_complete()
Example #5
0
        def _shrink_cluster():
            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()

            all_instance_ids = [db_instance.id for db_instance in db_instances]

            remove_instances = [Instance.load(context, instance_id)
                                for instance_id in instance_ids]

            left_instances = [Instance.load(context, instance_id)
                              for instance_id
                              in all_instance_ids
                              if instance_id not in instance_ids]

            remove_member_ips = [self.get_ip(instance)
                                 for instance in remove_instances]

            k = VerticaCluster.k_safety(len(left_instances))

            for db_instance in db_instances:
                if db_instance['type'] == 'master':
                    master_instance = Instance.load(context,
                                                    db_instance.id)
                    if self.get_ip(master_instance) in remove_member_ips:
                        raise RuntimeError(_("Cannot remove master instance!"))
                    LOG.debug(_("Marking cluster k-safety: %s") % k)
                    self.get_guest(master_instance).mark_design_ksafe(k)
                    self.get_guest(master_instance).shrink_cluster(
                        remove_member_ips)
                    break

            for r in remove_instances:
                Instance.delete(r)
Example #6
0
        def _add_shard_cluster():

            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               shard_id=shard_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]
            LOG.debug("instances in shard %s: %s" % (shard_id,
                                                     instance_ids))
            if not self._all_instances_ready(instance_ids, cluster_id,
                                             shard_id):
                return

            members = [Instance.load(context, instance_id)
                       for instance_id in instance_ids]

            db_query_routers = DBInstance.find_all(cluster_id=cluster_id,
                                                   type='query_router',
                                                   deleted=False).all()
            query_routers = [Instance.load(context, db_query_router.id)
                             for db_query_router in db_query_routers]

            if not self._create_shard(query_routers[0], members):
                return

            for member in members:
                self.get_guest(member).cluster_complete()
Example #7
0
        def _shrink_cluster():
            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()

            all_instance_ids = [db_instance.id for db_instance in db_instances]

            remove_instances = [Instance.load(context, instance_id)
                                for instance_id in instance_ids]

            left_instances = [Instance.load(context, instance_id)
                              for instance_id
                              in all_instance_ids
                              if instance_id not in instance_ids]

            remove_member_ips = [self.get_ip(instance)
                                 for instance in remove_instances]

            k = VerticaCluster.k_safety(len(left_instances))

            for db_instance in db_instances:
                if db_instance['type'] == 'master':
                    master_instance = Instance.load(context,
                                                    db_instance.id)
                    if self.get_ip(master_instance) in remove_member_ips:
                        raise RuntimeError(_("Cannot remove master instance!"))
                    LOG.debug("Marking cluster k-safety: %s", k)
                    self.get_guest(master_instance).mark_design_ksafe(k)
                    self.get_guest(master_instance).shrink_cluster(
                        remove_member_ips)
                    break

            for r in remove_instances:
                Instance.delete(r)
Example #8
0
 def _grow_cluster():
     new_instances = [db_instance for db_instance in self.db_instances
                      if db_instance.id in instance_ids]
     new_members = [db_instance for db_instance in new_instances
                    if db_instance.type == 'member']
     new_query_routers = [db_instance for db_instance in new_instances
                          if db_instance.type == 'query_router']
     instances = []
     if new_members:
         shard_ids = set([db_instance.shard_id for db_instance
                          in new_members])
         query_router_id = self._get_running_query_router_id()
         if not query_router_id:
             return
         for shard_id in shard_ids:
             LOG.debug('growing cluster by adding shard %(shard_id)s '
                       'on query router %(router_id)s',
                       {'shard_id': shard_id,
                        'router_id': query_router_id})
             member_ids = [db_instance.id for db_instance in new_members
                           if db_instance.shard_id == shard_id]
             if not self._all_instances_ready(
                 member_ids, cluster_id, shard_id
             ):
                 return
             members = [Instance.load(context, member_id)
                        for member_id in member_ids]
             query_router = Instance.load(context, query_router_id)
             if not self._create_shard(query_router, members):
                 return
             instances.extend(members)
     if new_query_routers:
         query_router_ids = [db_instance.id for db_instance
                             in new_query_routers]
         config_servers_ids = [db_instance.id for db_instance
                               in self.db_instances
                               if db_instance.type == 'config_server']
         LOG.debug('growing cluster by adding query routers '
                   '%(router)s, with config servers %(server)s',
                   {'router': query_router_ids,
                    'server': config_servers_ids})
         if not self._all_instances_ready(
             query_router_ids, cluster_id
         ):
             return
         query_routers = [Instance.load(context, instance_id)
                          for instance_id in query_router_ids]
         config_servers_ips = [
             self.get_ip(Instance.load(context, config_server_id))
             for config_server_id in config_servers_ids
         ]
         if not self._add_query_routers(
                 query_routers, config_servers_ips,
                 admin_password=self.get_cluster_admin_password(context)
         ):
             return
         instances.extend(query_routers)
     for instance in instances:
         self.get_guest(instance).cluster_complete()
Example #9
0
 def _grow_cluster():
     new_instances = [db_instance for db_instance in self.db_instances
                      if db_instance.id in instance_ids]
     new_members = [db_instance for db_instance in new_instances
                    if db_instance.type == 'member']
     new_query_routers = [db_instance for db_instance in new_instances
                          if db_instance.type == 'query_router']
     instances = []
     if new_members:
         shard_ids = set([db_instance.shard_id for db_instance
                          in new_members])
         query_router_id = self._get_running_query_router_id()
         if not query_router_id:
             return
         for shard_id in shard_ids:
             LOG.debug('growing cluster by adding shard %s on query '
                       'router %s' % (shard_id, query_router_id))
             member_ids = [db_instance.id for db_instance in new_members
                           if db_instance.shard_id == shard_id]
             if not self._all_instances_ready(
                 member_ids, cluster_id, shard_id
             ):
                 return
             members = [Instance.load(context, member_id)
                        for member_id in member_ids]
             query_router = Instance.load(context, query_router_id)
             if not self._create_shard(query_router, members):
                 return
             instances.extend(members)
     if new_query_routers:
         query_router_ids = [db_instance.id for db_instance
                             in new_query_routers]
         config_servers_ids = [db_instance.id for db_instance
                               in self.db_instances
                               if db_instance.type == 'config_server']
         LOG.debug('growing cluster by adding query routers %s, '
                   'with config servers %s'
                   % (query_router_ids, config_servers_ids))
         if not self._all_instances_ready(
             query_router_ids, cluster_id
         ):
             return
         query_routers = [Instance.load(context, instance_id)
                          for instance_id in query_router_ids]
         config_servers_ips = [
             self.get_ip(Instance.load(context, config_server_id))
             for config_server_id in config_servers_ids
         ]
         if not self._add_query_routers(
                 query_routers, config_servers_ips,
                 admin_password=self.get_cluster_admin_password(context)
         ):
             return
         instances.extend(query_routers)
     for instance in instances:
         self.get_guest(instance).cluster_complete()
Example #10
0
        def _shrink_cluster():
            removal_instances = [
                Instance.load(context, instance_id)
                for instance_id in removal_instance_ids
            ]
            for instance in removal_instances:
                Instance.delete(instance)

            # wait for instances to be deleted
            def all_instances_marked_deleted():
                non_deleted_instances = DBInstance.find_all(
                    cluster_id=cluster_id, deleted=False).all()
                non_deleted_ids = [
                    db_instance.id for db_instance in non_deleted_instances
                ]
                return not bool(
                    set(removal_instance_ids).intersection(
                        set(non_deleted_ids)))

            try:
                LOG.info("Deleting instances (%s)", removal_instance_ids)
                utils.poll_until(all_instances_marked_deleted,
                                 sleep_time=2,
                                 time_out=CONF.cluster_delete_time_out)
            except PollTimeOut:
                LOG.error("timeout for instances to be marked as deleted.")
                return

            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()
            leftover_instances = [
                Instance.load(context, db_inst.id) for db_inst in db_instances
                if db_inst.id not in removal_instance_ids
            ]
            leftover_cluster_ips = [
                self.get_ip(instance) for instance in leftover_instances
            ]

            # Get config changes for left over instances
            rnd_cluster_guest = self.get_guest(leftover_instances[0])
            cluster_context = rnd_cluster_guest.get_cluster_context()

            # apply the new config to all leftover instances
            for instance in leftover_instances:
                guest = self.get_guest(instance)
                # render the conf.d/cluster.cnf configuration
                cluster_configuration = self._render_cluster_config(
                    context, instance, ",".join(leftover_cluster_ips),
                    cluster_context['cluster_name'],
                    cluster_context['replication_user'])
                guest.write_cluster_configuration_overrides(
                    cluster_configuration)
Example #11
0
        def _create_cluster():

            # Fetch instances by cluster_id against instances table.
            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()
            instance_ids = [db_instance.id for db_instance in db_instances]

            # Wait for cluster members to get to cluster-ready status.
            if not self._all_instances_ready(instance_ids, cluster_id):
                return

            LOG.debug("All members ready, proceeding for cluster setup.")
            instances = [
                Instance.load(context, instance_id)
                for instance_id in instance_ids
            ]

            member_ips = [self.get_ip(instance) for instance in instances]
            guests = [self.get_guest(instance) for instance in instances]

            # Users to be configured for password-less SSH.
            authorized_users_without_password = ['root', 'dbadmin']

            # Configuring password-less SSH for cluster members.
            # Strategy for setting up SSH:
            # get public keys for user from member-instances in cluster,
            # combine them, finally push it back to all instances,
            # and member instances add them to authorized keys.
            LOG.debug("Configuring password-less SSH on cluster members.")
            try:
                for user in authorized_users_without_password:
                    pub_key = [guest.get_public_keys(user) for guest in guests]
                    for guest in guests:
                        guest.authorize_public_keys(user, pub_key)

                LOG.debug("Installing cluster with members: %s." % member_ips)
                for db_instance in db_instances:
                    if db_instance['type'] == 'master':
                        master_instance = Instance.load(
                            context, db_instance.id)
                        self.get_guest(master_instance).install_cluster(
                            member_ips)
                        break

                LOG.debug("Finalizing cluster configuration.")
                for guest in guests:
                    guest.cluster_complete()
            except Exception:
                LOG.exception(_("Error creating cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #12
0
        def _create_cluster():

            # Fetch instances by cluster_id against instances table.
            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()
            instance_ids = [db_instance.id for db_instance in db_instances]

            # Wait for cluster members to get to cluster-ready status.
            if not self._all_instances_ready(instance_ids, cluster_id):
                return

            LOG.debug("All members ready, proceeding for cluster setup.")
            instances = [Instance.load(context, instance_id) for instance_id
                         in instance_ids]

            member_ips = [self.get_ip(instance) for instance in instances]
            guests = [self.get_guest(instance) for instance in instances]

            # Users to be configured for password-less SSH.
            authorized_users_without_password = ['root', 'dbadmin']

            # Configuring password-less SSH for cluster members.
            # Strategy for setting up SSH:
            # get public keys for user from member-instances in cluster,
            # combine them, finally push it back to all instances,
            # and member instances add them to authorized keys.
            LOG.debug("Configuring password-less SSH on cluster members.")
            try:
                for user in authorized_users_without_password:
                    pub_key = [guest.get_public_keys(user) for guest in guests]
                    for guest in guests:
                        guest.authorize_public_keys(user, pub_key)

                LOG.debug("Installing cluster with members: %s." % member_ips)
                for db_instance in db_instances:
                    if db_instance['type'] == 'master':
                        master_instance = Instance.load(context,
                                                        db_instance.id)
                        self.get_guest(master_instance).install_cluster(
                            member_ips)
                        break

                LOG.debug("Finalizing cluster configuration.")
                for guest in guests:
                    guest.cluster_complete()
            except Exception:
                LOG.exception(_("Error creating cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #13
0
        def _shrink_cluster():
            removal_instances = [Instance.load(context, instance_id)
                                 for instance_id in removal_instance_ids]
            for instance in removal_instances:
                Instance.delete(instance)

            # wait for instances to be deleted
            def all_instances_marked_deleted():
                non_deleted_instances = DBInstance.find_all(
                    cluster_id=cluster_id, deleted=False).all()
                non_deleted_ids = [db_instance.id for db_instance
                                   in non_deleted_instances]
                return not bool(
                    set(removal_instance_ids).intersection(
                        set(non_deleted_ids))
                )
            try:
                LOG.info(_("Deleting instances (%s)") % removal_instance_ids)
                utils.poll_until(all_instances_marked_deleted,
                                 sleep_time=2,
                                 time_out=CONF.cluster_delete_time_out)
            except PollTimeOut:
                LOG.error(_("timeout for instances to be marked as deleted."))
                return

            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            leftover_instances = [Instance.load(context, db_inst.id)
                                  for db_inst in db_instances
                                  if db_inst.id not in removal_instance_ids]
            leftover_cluster_ips = [self.get_ip(instance) for instance in
                                    leftover_instances]

            # Get config changes for left over instances
            rnd_cluster_guest = self.get_guest(leftover_instances[0])
            cluster_context = rnd_cluster_guest.get_cluster_context()

            # apply the new config to all leftover instances
            for instance in leftover_instances:
                guest = self.get_guest(instance)
                # render the conf.d/cluster.cnf configuration
                cluster_configuration = self._render_cluster_config(
                    context,
                    instance,
                    ",".join(leftover_cluster_ips),
                    cluster_context['cluster_name'],
                    cluster_context['replication_user'])
                guest.write_cluster_configuration_overrides(
                    cluster_configuration)
        def _grow_cluster():
            # Wait for new nodes to get to cluster-ready status.
            LOG.debug("Waiting for new nodes to become ready.")
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                return

            new_instances = [Instance.load(context, instance_id)
                             for instance_id in new_instance_ids]
            added_nodes = [self.build_node_info(instance)
                           for instance in new_instances]

            LOG.debug("All nodes ready, proceeding with cluster setup.")

            cluster_node_ids = self.find_cluster_node_ids(cluster_id)
            cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids)

            # Rebalance the cluster via one of the existing nodes.
            # Clients can continue to store and retrieve information and
            # do not need to be aware that a rebalance operation is taking
            # place.
            # The new nodes are marked active only if the rebalancing
            # completes.
            try:
                coordinator = cluster_nodes[0]
                self._add_nodes(coordinator, added_nodes)
                LOG.debug("Cluster configuration finished successfully.")
            except Exception:
                LOG.exception(_("Error growing cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #15
0
File: api.py Project: no2a/trove
    def shrink(self, instances):
        """Removes instances from a cluster."""
        LOG.debug("Shrinking cluster %s.", self.id)

        self.validate_cluster_available()
        removal_instances = [
            Instance.load(self.context, inst_id) for inst_id in instances
        ]
        db_instances = DBInstance.find_all(cluster_id=self.db_info.id,
                                           deleted=False).all()
        if len(db_instances) - len(removal_instances) < 1:
            raise exception.ClusterShrinkMustNotLeaveClusterEmpty()

        self.db_info.update(task_status=ClusterTasks.SHRINKING_CLUSTER)
        try:
            task_api.load(self.context,
                          self.ds_version.manager).shrink_cluster(
                              self.db_info.id,
                              [instance.id for instance in removal_instances])
        except Exception:
            self.db_info.update(task_status=ClusterTasks.NONE)
            raise

        return self.__class__(self.context, self.db_info, self.ds,
                              self.ds_version)
Example #16
0
        def _grow_cluster():
            # Wait for new nodes to get to cluster-ready status.
            LOG.debug("Waiting for new nodes to become ready.")
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                return

            new_instances = [Instance.load(context, instance_id)
                             for instance_id in new_instance_ids]
            add_node_info = [self.build_node_info(instance)
                             for instance in new_instances]

            LOG.debug("All nodes ready, proceeding with cluster setup.")

            cluster_node_ids = self.find_cluster_node_ids(cluster_id)
            cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids)

            old_node_info = [node for node in cluster_nodes
                             if node['id'] not in new_instance_ids]

            # Rebalance the cluster via one of the existing nodes.
            # Clients can continue to store and retrieve information and
            # do not need to be aware that a rebalance operation is taking
            # place.
            coordinator = old_node_info[0]
            self._add_nodes(coordinator, add_node_info)
            LOG.debug("Cluster grow finished successfully.")
Example #17
0
        def _create_cluster():

            # fetch instances by cluster_id against instances table
            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]
            LOG.debug("instances in cluster %(cluster_id)s: %(instance_ids)s",
                      {'cluster_id': cluster_id, 'instance_ids': instance_ids})

            if not self._all_instances_ready(instance_ids, cluster_id):
                return

            LOG.debug("all instances in cluster %s ready.", cluster_id)

            instances = [Instance.load(context, instance_id) for instance_id
                         in instance_ids]

            # filter tidb_server in instances into a new list: query_routers
            tidb_server = [instance for instance in instances if
                             instance.type == 'tidb_server']
            LOG.debug("tidb_server: %s",
                      [instance.id for instance in query_routers])
            # filter pd_server in instances into new list: config_servers
            pd_server = [instance for instance in instances if
                              instance.type == 'pd_server']
            LOG.debug("pd_server: %s",
                      [instance.id for instance in pd_server])
            # filter tikv  into a new list: tikvs
            tikv = [instance for instance in instances if
                       instance.type == 'tikv']
            LOG.debug("tikv: %s",
                      [instance.id for instance in tikv])
Example #18
0
        def _create_resources():
            # parse the ID from the Ref
            instance_id = utils.get_id_from_href(instance)

            # verify that the instance exists and can perform actions
            from trove.instance.models import Instance
            instance_model = Instance.load(context, instance_id)
            instance_model.validate_can_perform_action()

            cls.verify_swift_auth_token(context)

            try:
                db_info = DBBackup.create(name=name,
                                          description=description,
                                          tenant_id=context.tenant,
                                          state=BackupState.NEW,
                                          instance_id=instance_id,
                                          deleted=False)
            except exception.InvalidModelError as ex:
                LOG.exception("Unable to create Backup record:")
                raise exception.BackupCreationError(str(ex))

            backup_info = {'id': db_info.id,
                           'name': name,
                           'description': description,
                           'instance_id': instance_id,
                           'backup_type': db_info.backup_type,
                           'checksum': db_info.checksum,
                           }
            api.API(context).create_backup(backup_info, instance_id)
            return db_info
Example #19
0
        def _grow_cluster():
            # Wait for new nodes to get to cluster-ready status.
            LOG.debug("Waiting for new nodes to become ready.")
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                return

            new_instances = [
                Instance.load(context, instance_id)
                for instance_id in new_instance_ids
            ]
            added_nodes = [
                self.build_node_info(instance) for instance in new_instances
            ]

            LOG.debug("All nodes ready, proceeding with cluster setup.")

            cluster_node_ids = self.find_cluster_node_ids(cluster_id)
            cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids)

            # Rebalance the cluster via one of the existing nodes.
            # Clients can continue to store and retrieve information and
            # do not need to be aware that a rebalance operation is taking
            # place.
            # The new nodes are marked active only if the rebalancing
            # completes.
            try:
                coordinator = cluster_nodes[0]
                self._add_nodes(coordinator, added_nodes)
                LOG.debug("Cluster configuration finished successfully.")
            except Exception:
                LOG.exception(_("Error growing cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #20
0
        def _create_cluster():

            # fetch instances by cluster_id against instances table
            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]
            LOG.debug("instances in cluster %s: %s" % (cluster_id,
                                                       instance_ids))

            if not self._all_instances_ready(instance_ids, cluster_id):
                return

            instances = [Instance.load(context, instance_id) for instance_id
                         in instance_ids]

            # filter query routers in instances into a new list: query_routers
            query_routers = [instance for instance in instances if
                             instance.type == 'query_router']
            LOG.debug("query routers: %s" %
                      [instance.id for instance in query_routers])
            # filter config servers in instances into new list: config_servers
            config_servers = [instance for instance in instances if
                              instance.type == 'config_server']
            LOG.debug("config servers: %s" %
                      [instance.id for instance in config_servers])
            # filter members (non router/configsvr) into a new list: members
            members = [instance for instance in instances if
                       instance.type == 'member']
            LOG.debug("members: %s" %
                      [instance.id for instance in members])

            # for config_server in config_servers, append ip/hostname to
            # "config_server_hosts", then
            # peel off the replica-set name and ip/hostname from 'x'
            config_server_ips = [self.get_ip(instance)
                                 for instance in config_servers]
            LOG.debug("config server ips: %s" % config_server_ips)

            LOG.debug("calling add_config_servers on query_routers")
            try:
                for query_router in query_routers:
                    (self.get_guest(query_router)
                     .add_config_servers(config_server_ips))
            except Exception:
                LOG.exception(_("error adding config servers"))
                self.update_statuses_on_failure(cluster_id)
                return

            if not self._create_replica_set(members, cluster_id):
                return

            replica_set_name = "rs1"
            if not self._create_shard(query_routers, replica_set_name,
                                      members, cluster_id):
                return
            # call to start checking status
            for instance in instances:
                self.get_guest(instance).cluster_complete()
Example #21
0
        def _create_cluster():

            # fetch instances by cluster_id against instances table
            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]
            LOG.debug("instances in cluster %s: %s" % (cluster_id,
                                                       instance_ids))

            if not self._all_instances_ready(instance_ids, cluster_id):
                return

            instances = [Instance.load(context, instance_id) for instance_id
                         in instance_ids]

            # filter query routers in instances into a new list: query_routers
            query_routers = [instance for instance in instances if
                             instance.type == 'query_router']
            LOG.debug("query routers: %s" %
                      [instance.id for instance in query_routers])
            # filter config servers in instances into new list: config_servers
            config_servers = [instance for instance in instances if
                              instance.type == 'config_server']
            LOG.debug("config servers: %s" %
                      [instance.id for instance in config_servers])
            # filter members (non router/configsvr) into a new list: members
            members = [instance for instance in instances if
                       instance.type == 'member']
            LOG.debug("members: %s" %
                      [instance.id for instance in members])

            # for config_server in config_servers, append ip/hostname to
            # "config_server_hosts", then
            # peel off the replica-set name and ip/hostname from 'x'
            config_server_ips = [self.get_ip(instance)
                                 for instance in config_servers]
            LOG.debug("config server ips: %s" % config_server_ips)

            LOG.debug("calling add_config_servers on query_routers")
            try:
                for query_router in query_routers:
                    (self.get_guest(query_router)
                     .add_config_servers(config_server_ips))
            except Exception:
                LOG.exception(_("error adding config servers"))
                self.update_statuses_on_failure(cluster_id)
                return

            if not self._create_replica_set(members, cluster_id):
                return

            replica_set_name = "rs1"
            if not self._create_shard(query_routers, replica_set_name,
                                      members, cluster_id):
                return
            # call to start checking status
            for instance in instances:
                self.get_guest(instance).cluster_complete()
Example #22
0
        def _grow_cluster():
            LOG.debug("begin grow_cluster for Vertica cluster %s" % cluster_id)

            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()

            instance_ids = [db_instance.id for db_instance in db_instances]

            # Wait for new cluster members to get to cluster-ready status.
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                return

            new_insts = [
                Instance.load(context, instance_id)
                for instance_id in new_instance_ids
            ]

            existing_instances = [
                Instance.load(context, instance_id)
                for instance_id in instance_ids
                if instance_id not in new_instance_ids
            ]

            existing_guests = [self.get_guest(i) for i in existing_instances]
            new_guests = [self.get_guest(i) for i in new_insts]
            all_guests = new_guests + existing_guests

            authorized_users_without_password = ['root', 'dbadmin']
            new_ips = [self.get_ip(instance) for instance in new_insts]

            for user in authorized_users_without_password:
                pub_key = [guest.get_public_keys(user) for guest in all_guests]
                for guest in all_guests:
                    guest.authorize_public_keys(user, pub_key)

            for db_instance in db_instances:
                if db_instance['type'] == 'master':
                    LOG.debug("Found 'master' instance, calling grow on guest")
                    master_instance = Instance.load(context, db_instance.id)
                    self.get_guest(master_instance).grow_cluster(new_ips)
                    break

            for guest in new_guests:
                guest.cluster_complete()
Example #23
0
        def _create_cluster():

            # fetch instances by cluster_id against instances table
            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]
            LOG.debug("instances in cluster %s: %s" %
                      (cluster_id, instance_ids))

            if not self._all_instances_ready(instance_ids, cluster_id):
                return

            LOG.debug("all instances in cluster %s ready." % cluster_id)

            instances = [
                Instance.load(context, instance_id)
                for instance_id in instance_ids
            ]

            # filter query routers in instances into a new list: query_routers
            query_routers = [
                instance for instance in instances
                if instance.type == 'query_router'
            ]
            LOG.debug("query routers: %s" %
                      [instance.id for instance in query_routers])
            # filter config servers in instances into new list: config_servers
            config_servers = [
                instance for instance in instances
                if instance.type == 'config_server'
            ]
            LOG.debug("config servers: %s" %
                      [instance.id for instance in config_servers])
            # filter members (non router/configsvr) into a new list: members
            members = [
                instance for instance in instances if instance.type == 'member'
            ]
            LOG.debug("members: %s" % [instance.id for instance in members])

            # for config_server in config_servers, append ip/hostname to
            # "config_server_hosts", then
            # peel off the replica-set name and ip/hostname from 'x'
            config_server_ips = [
                self.get_ip(instance) for instance in config_servers
            ]
            LOG.debug("config server ips: %s" % config_server_ips)

            if not self._add_query_routers(query_routers, config_server_ips):
                return

            if not self._create_shard(query_routers[0], members):
                return

            # call to start checking status
            for instance in instances:
                self.get_guest(instance).cluster_complete()
Example #24
0
    def grow_cluster(self, context, cluster_id, new_instance_ids):
        """Grow a K2hdkc Cluster."""
        LOG.debug(
            "Begins grow_cluster for %s. new_instance_ids:{}".format(
                new_instance_ids), cluster_id)

        # 1. validates args
        if context is None:
            LOG.error("no context")
            return
        if cluster_id is None:
            LOG.error("no cluster_id")
            return
        if new_instance_ids is None:
            LOG.error("no new_instance_ids")
            return

        timeout = Timeout(CONF.cluster_usage_timeout)
        try:
            # 2. Retrieves db_instances from the database
            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()
            LOG.debug("len(db_instances) {}".format(len(db_instances)))
            # 3. Checks if new instances are ready
            if not self._all_instances_running(new_instance_ids, cluster_id):
                LOG.error("instances are not ready yet")
                return
            # 4. Loads instances
            instances = [
                Instance.load(context, instance_id)
                for instance_id in new_instance_ids
            ]
            LOG.debug("len(instances) {}".format(len(instances)))

            # 5. Instances GuestAgent class
            # 6. Calls cluster_complete endpoint of K2hdkcGuestAgent
            LOG.debug(
                "Calling cluster_complete as a final hook to each node in the cluster"
            )
            for instance in instances:
                self.get_guest(instance).cluster_complete()
            # 7. reset the current cluster task status to None
            LOG.debug("reset cluster task to None")
            self.reset_task()
        except Timeout:
            # Note adminstrators should reset task via CLI in this case.
            if Timeout is not timeout:
                raise  # not my timeout
            LOG.exception("Timeout for growing cluster.")
            self.update_statuses_on_failure(
                cluster_id, status=inst_tasks.InstanceTasks.GROWING_ERROR)
        finally:
            timeout.cancel()

        LOG.debug("Completed grow_cluster for %s.", cluster_id)
Example #25
0
        def _grow_cluster():
            LOG.debug("begin grow_cluster for Vertica cluster %s" % cluster_id)

            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()

            instance_ids = [db_instance.id for db_instance in db_instances]

            # Wait for new cluster members to get to cluster-ready status.
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                return

            new_insts = [Instance.load(context, instance_id)
                         for instance_id in new_instance_ids]

            existing_instances = [Instance.load(context, instance_id)
                                  for instance_id
                                  in instance_ids
                                  if instance_id not in new_instance_ids]

            existing_guests = [self.get_guest(i) for i in existing_instances]
            new_guests = [self.get_guest(i) for i in new_insts]
            all_guests = new_guests + existing_guests

            authorized_users_without_password = ['root', 'dbadmin']
            new_ips = [self.get_ip(instance) for instance in new_insts]

            for user in authorized_users_without_password:
                pub_key = [guest.get_public_keys(user) for guest in all_guests]
                for guest in all_guests:
                    guest.authorize_public_keys(user, pub_key)

            for db_instance in db_instances:
                if db_instance['type'] == 'master':
                    LOG.debug("Found 'master' instance, calling grow on guest")
                    master_instance = Instance.load(context,
                                                    db_instance.id)
                    self.get_guest(master_instance).grow_cluster(new_ips)
                    break

            for guest in new_guests:
                guest.cluster_complete()
Example #26
0
        def _create_resources():
            # parse the ID from the Ref
            instance_id = utils.get_id_from_href(instance)

            # verify that the instance exists and can perform actions
            from trove.instance.models import Instance
            instance_model = Instance.load(context, instance_id)
            instance_model.validate_can_perform_action()
            cls.validate_can_perform_action(instance_model, 'backup_create')
            cls.verify_swift_auth_token(context)
            if instance_model.cluster_id is not None:
                raise exception.ClusterInstanceOperationNotSupported()

            ds = instance_model.datastore
            ds_version = instance_model.datastore_version
            parent = None
            if parent_id:
                # Look up the parent info or fail early if not found or if
                # the user does not have access to the parent.
                _parent = cls.get_by_id(context, parent_id)
                parent = {
                    'location': _parent.location,
                    'checksum': _parent.checksum,
                }
            try:
                db_info = DBBackup.create(name=name,
                                          description=description,
                                          tenant_id=context.tenant,
                                          state=BackupState.NEW,
                                          instance_id=instance_id,
                                          parent_id=parent_id,
                                          datastore_version_id=ds_version.id,
                                          deleted=False)
            except exception.InvalidModelError as ex:
                LOG.exception(
                    _("Unable to create backup record for "
                      "instance: %s"), instance_id)
                raise exception.BackupCreationError(str(ex))

            backup_info = {
                'id': db_info.id,
                'name': name,
                'description': description,
                'instance_id': instance_id,
                'backup_type': db_info.backup_type,
                'checksum': db_info.checksum,
                'parent': parent,
                'datastore': ds.name,
                'datastore_version': ds_version.name,
            }
            api.API(context).create_backup(backup_info, instance_id)
            return db_info
Example #27
0
        def _create_resources():
            # parse the ID from the Ref
            instance_id = utils.get_id_from_href(instance)

            # verify that the instance exists and can perform actions
            from trove.instance.models import Instance
            instance_model = Instance.load(context, instance_id)
            instance_model.validate_can_perform_action()
            cls.validate_can_perform_action(
                instance_model, 'backup_create')
            cls.verify_swift_auth_token(context)
            if instance_model.cluster_id is not None:
                raise exception.ClusterInstanceOperationNotSupported()

            ds = instance_model.datastore
            ds_version = instance_model.datastore_version
            parent = None
            if parent_id:
                # Look up the parent info or fail early if not found or if
                # the user does not have access to the parent.
                _parent = cls.get_by_id(context, parent_id)
                parent = {
                    'location': _parent.location,
                    'checksum': _parent.checksum,
                }
            try:
                db_info = DBBackup.create(name=name,
                                          description=description,
                                          tenant_id=context.tenant,
                                          state=BackupState.NEW,
                                          instance_id=instance_id,
                                          parent_id=parent_id,
                                          datastore_version_id=ds_version.id,
                                          deleted=False)
            except exception.InvalidModelError as ex:
                LOG.exception(_("Unable to create backup record for "
                                "instance: %s"), instance_id)
                raise exception.BackupCreationError(str(ex))

            backup_info = {'id': db_info.id,
                           'name': name,
                           'description': description,
                           'instance_id': instance_id,
                           'backup_type': db_info.backup_type,
                           'checksum': db_info.checksum,
                           'parent': parent,
                           'datastore': ds.name,
                           'datastore_version': ds_version.name,
                           }
            api.API(context).create_backup(backup_info, instance_id)
            return db_info
Example #28
0
    def create_cluster(self, context, cluster_id):
        """Create K2hdkcClusterTasks.

        This function is called in trove.taskmanager.Manager.create_cluster.
        """

        LOG.debug("Begins create_cluster for %s.", cluster_id)

        # 1. validates args
        if context is None:
            LOG.error("no context")
            return
        if cluster_id is None:
            LOG.error("no cluster_id")
            return

        timeout = Timeout(CONF.cluster_usage_timeout)
        LOG.debug("CONF.cluster_usage_timeout %s.", timeout)
        try:
            # 2. Retrieves db_instances from the database
            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()
            # 3. Retrieves instance ids from the db_instances
            instance_ids = [db_instance.id for db_instance in db_instances]
            # 4. Checks if instances are ready
            if not self._all_instances_running(instance_ids, cluster_id):
                LOG.error("instances are not ready yet")
                return
            # 5. Loads instances
            instances = [
                Instance.load(context, instance_id)
                for instance_id in instance_ids
            ]
            # 6. Instantiates GuestAgent for each guest instance
            # 7. Calls cluster_complete endpoint of K2hdkcGuestAgent
            for instance in instances:
                self.get_guest(instance).cluster_complete()
            # 8. reset the current cluster task status to None
            LOG.debug("reset cluster task to None")
            self.reset_task()
        except Timeout:
            # Note adminstrators should reset task via CLI in this case.
            if Timeout is not timeout:
                raise  # not my timeout
            LOG.exception("Timeout for building cluster.")
            self.update_statuses_on_failure(cluster_id)
        finally:
            timeout.cancel()

        LOG.debug("Completed create_cluster for %s.", cluster_id)
Example #29
0
        def _create_cluster():

            # fetch instances by cluster_id against instances table
            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]
            LOG.debug("instances in cluster %s: %s" % (cluster_id,
                                                       instance_ids))

            if not self._all_instances_ready(instance_ids, cluster_id):
                return

            LOG.debug("all instances in cluster %s ready." % cluster_id)

            instances = [Instance.load(context, instance_id) for instance_id
                         in instance_ids]

            # filter query routers in instances into a new list: query_routers
            query_routers = [instance for instance in instances if
                             instance.type == 'query_router']
            LOG.debug("query routers: %s" %
                      [instance.id for instance in query_routers])
            # filter config servers in instances into new list: config_servers
            config_servers = [instance for instance in instances if
                              instance.type == 'config_server']
            LOG.debug("config servers: %s" %
                      [instance.id for instance in config_servers])
            # filter members (non router/configsvr) into a new list: members
            members = [instance for instance in instances if
                       instance.type == 'member']
            LOG.debug("members: %s" %
                      [instance.id for instance in members])

            # for config_server in config_servers, append ip/hostname to
            # "config_server_hosts", then
            # peel off the replica-set name and ip/hostname from 'x'
            config_server_ips = [self.get_ip(instance)
                                 for instance in config_servers]
            LOG.debug("config server ips: %s" % config_server_ips)

            if not self._add_query_routers(query_routers,
                                           config_server_ips):
                return

            if not self._create_shard(query_routers[0], members):
                return

            # call to start checking status
            for instance in instances:
                self.get_guest(instance).cluster_complete()
Example #30
0
File: api.py Project: vmazur/trove
    def shrink(self, instances):
        """Removes instances from a cluster."""
        LOG.debug("Shrinking cluster %s." % self.id)

        self.validate_cluster_available()
        removal_instances = [Instance.load(self.context, inst_id) for inst_id in instances]
        db_instances = DBInstance.find_all(cluster_id=self.db_info.id).all()
        if len(db_instances) - len(removal_instances) < 1:
            raise exception.ClusterShrinkMustNotLeaveClusterEmpty()

        self.db_info.update(task_status=ClusterTasks.SHRINKING_CLUSTER)
        task_api.load(self.context, self.ds_version.manager).shrink_cluster(
            self.db_info.id, [instance.id for instance in removal_instances]
        )

        return PXCCluster(self.context, self.db_info, self.ds, self.ds_version)
Example #31
0
        def _create_cluster():

            # Fetch instances by cluster_id against instances table.
            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]

            # Wait for cluster members to get to cluster-ready status.
            if not self._all_instances_ready(instance_ids, cluster_id):
                return

            LOG.debug("All members ready, proceeding for cluster setup.")
            instances = [
                Instance.load(context, instance_id)
                for instance_id in instance_ids
            ]

            # Connect nodes to the first node
            guests = [self.get_guest(instance) for instance in instances]
            try:
                cluster_head = instances[0]
                cluster_head_port = '6379'
                cluster_head_ip = self.get_ip(cluster_head)
                for guest in guests[1:]:
                    guest.cluster_meet(cluster_head_ip, cluster_head_port)

                num_nodes = len(instances)
                total_slots = 16384
                slots_per_node = total_slots / num_nodes
                leftover_slots = total_slots % num_nodes
                first_slot = 0
                for guest in guests:
                    last_slot = first_slot + slots_per_node
                    if leftover_slots > 0:
                        leftover_slots -= 1
                    else:
                        last_slot -= 1
                    guest.cluster_addslots(first_slot, last_slot)
                    first_slot = last_slot + 1

                for guest in guests:
                    guest.cluster_complete()
            except Exception:
                LOG.exception(_("Error creating cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #32
0
        def _create_resources():
            # parse the ID from the Ref
            instance_id = utils.get_id_from_href(instance)

            # verify that the instance exists and can perform actions
            from trove.instance.models import Instance

            instance_model = Instance.load(context, instance_id)
            instance_model.validate_can_perform_action()

            cls.verify_swift_auth_token(context)

            parent = None
            if parent_id:
                # Look up the parent info or fail early if not found or if
                # the user does not have access to the parent.
                _parent = cls.get_by_id(context, parent_id)
                parent = {"location": _parent.location, "checksum": _parent.checksum}
            try:
                db_info = DBBackup.create(
                    name=name,
                    description=description,
                    tenant_id=context.tenant,
                    state=BackupState.NEW,
                    instance_id=instance_id,
                    parent_id=parent_id,
                    deleted=False,
                )
            except exception.InvalidModelError as ex:
                LOG.exception("Unable to create Backup record:")
                raise exception.BackupCreationError(str(ex))

            backup_info = {
                "id": db_info.id,
                "name": name,
                "description": description,
                "instance_id": instance_id,
                "backup_type": db_info.backup_type,
                "checksum": db_info.checksum,
                "parent": parent,
            }
            api.API(context).create_backup(backup_info, instance_id)
            return db_info
Example #33
0
        def _create_cluster():

            # Fetch instances by cluster_id against instances table.
            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]

            # Wait for cluster members to get to cluster-ready status.
            if not self._all_instances_ready(instance_ids, cluster_id):
                return

            LOG.debug("All members ready, proceeding for cluster setup.")
            instances = [Instance.load(context, instance_id) for instance_id
                         in instance_ids]

            # Connect nodes to the first node
            guests = [self.get_guest(instance) for instance in instances]
            try:
                cluster_head = instances[0]
                cluster_head_port = '6379'
                cluster_head_ip = self.get_ip(cluster_head)
                for guest in guests[1:]:
                    guest.cluster_meet(cluster_head_ip, cluster_head_port)

                num_nodes = len(instances)
                total_slots = 16384
                slots_per_node = total_slots / num_nodes
                leftover_slots = total_slots % num_nodes
                first_slot = 0
                for guest in guests:
                    last_slot = first_slot + slots_per_node
                    if leftover_slots > 0:
                        leftover_slots -= 1
                    else:
                        last_slot -= 1
                    guest.cluster_addslots(first_slot, last_slot)
                    first_slot = last_slot + 1

                for guest in guests:
                    guest.cluster_complete()
            except Exception:
                LOG.exception(_("Error creating cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #34
0
 def load_cluster_nodes(cls, context, node_ids):
     return [
         cls.build_node_info(Instance.load(context, node_id))
         for node_id in node_ids
     ]
Example #35
0
 def get_cluster_admin_password(self, context):
     """The cluster admin's user credentials are stored on all query
     routers. Find one and get the guest to return the password.
     """
     instance = Instance.load(context, self._get_running_query_router_id())
     return self.get_guest(instance).get_admin_password()
Example #36
0
    def create(cls, context, instance_id, name, description=None, group_id=None, backup_type=None,
               expire_at=None, init=False, service_image_id=None, parent_id=None):
        if parent_id is not None:
            parent_id = str(parent_id)
            LOG.info("parent_id:%s, parent_id.len:%s", parent_id, len(parent_id.strip()))
            if len(parent_id.strip()) == 0:
                parent_id = None
        _parent_id = parent_id

        from trove.instance.models import Instance
        instance_id = utils.get_id_from_href(instance_id)
        instance_model = Instance.load(context, instance_id)
        if init:
            if instance_model.db_info.server_status != 'ACTIVE':
                msg = ("Instance is not currently available for an action to be "
                       "performed (server_status was %s).", instance_model.db_info.server_status)
                LOG.error(msg)
                raise exception.UnprocessableEntity(msg)
        else:
            instance_model.validate_can_perform_action()

        if instance_model.type == DBInstanceType.MASTER:
            try:
                standby_id = InstanceGroupItem.get_by_gid_type(context, instance_model.group_id, DBInstanceType.STANDBY).instance_id
                instance_model = Instance.load(context, standby_id)
                instance_model.validate_can_perform_action()
                instance_id = standby_id
            except Exception as e:
                LOG.error(e)
                raise e

        if group_id is None:
            raise exception.TroveError("group_id can't None")
        if backup_type is None or backup_type not in [Type.SNAPSHOT, Type.AUTOBACKUP]:
            raise exception.TroveError("instType can't None, only accept value: snapshot or autobackup ")

        if backup_type == Type.SNAPSHOT:
            expire_time = 0
            _parent_id = None  # force full
        elif backup_type == Type.AUTOBACKUP:
            expire_time = int(expire_at)
            if parent_id and parent_id == '0':
                _parent_id = None  # force full
            elif parent_id and parent_id != '0':
                try:
                    backup_parent = cls.get_by_id(context, parent_id)
                    LOG.debug("backup_parent:%s", backup_parent)
                except:
                    raise exception.NotFound("not found backup with parent_id: %s" % parent_id)
                if not backup_parent:
                    raise exception.NotFound("not found backup with parent_id: %s" % parent_id)
            elif parent_id is None:
                LOG.debug("parent_id is None:%s", parent_id)
                last_backup_chain = cls.get_last_backup_chain(group_id)
                backup_incremental_chain_size = CONF.backup_incremental_chain_size
                LOG.info("last_backup_chain: %s, backup_incremental_chain_size: %s",
                         last_backup_chain, backup_incremental_chain_size)
                if len(last_backup_chain) == 0 \
                        or len(last_backup_chain) >= int(backup_incremental_chain_size):
                    _parent_id = None  # create full
                else:
                    compare_instance = None
                    try:
                        compare_instance = InstanceGroupItem.get_by_gid_type(context, group_id, DBInstanceType.STANDBY)
                    except exception.NotFound:  # not has standby
                        try:
                            compare_instance = InstanceGroupItem.get_by_gid_type(context, group_id,
                                                                                 DBInstanceType.SINGLE)
                        except exception.NotFound:  # not has single
                            pass
                    if compare_instance:
                        compare_id = compare_instance.instance_id
                        switched = False
                        for b in last_backup_chain:  # has standby
                            if b["instance_id"] != compare_id:
                                switched = True  # create full
                                LOG.debug("last_backup_chain: %s, switched: %s, backup_instance_id: %s, b.instance_id: %s"
                                         , last_backup_chain, switched, compare_id, b["instance_id"])
                                break
                        if not switched:
                            parent = last_backup_chain.pop()  # create incremental
                            _parent_id = parent["id"]
                    else:  # not found standby and single
                        _parent_id = None  # create full
        LOG.debug("create backup use parent_id: %s", _parent_id)

        def _create_resources():
            try:
                db_info = models.DBBackup.create(name=name,
                                                 description=description,
                                                 tenant_id=context.tenant,
                                                 state=models.BackupState.NEW,
                                                 instance_id=instance_id,
                                                 deleted=False,
                                                 group_id=group_id,
                                                 type=backup_type,
                                                 expire_at=expire_time,
                                                 service_image_id=service_image_id,
                                                 parent_id=_parent_id)
            except exception.InvalidModelError as ex:
                LOG.exception("Unable to create Backup record:")
                msg = "Unable to create Backup record, group_id %s, instance_id %s, parent_id %s " % (group_id, _instance_id, _parent_id)
                AlarmRpc(context).alarm(context.tenant, level=AlarmRpc.LEVEL_ERROR,
                                        _type=AlarmRpc.TYPE_TASKMANAGER, message=msg+str(ex))             
                raise exception.BackupCreationError(str(ex))

            api.API(context).create_backup(db_info.id, instance_id)
            return db_info

        return run_with_quotas(context.tenant,
                               {'backups': 1},
                               _create_resources)
Example #37
0
        def _create_cluster():

            # fetch instances by cluster_id against instances table
            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]
            LOG.debug("instances in cluster %s: %s" % (cluster_id,
                                                       instance_ids))

            if not self._all_instances_ready(instance_ids, cluster_id):
                return

            LOG.debug("all instances in cluster %s ready." % cluster_id)

            instances = [Instance.load(context, instance_id) for instance_id
                         in instance_ids]

            # filter query routers in instances into a new list: query_routers
            query_routers = [instance for instance in instances if
                             instance.type == 'query_router']
            LOG.debug("query routers: %s" %
                      [instance.id for instance in query_routers])
            # filter config servers in instances into new list: config_servers
            config_servers = [instance for instance in instances if
                              instance.type == 'config_server']
            LOG.debug("config servers: %s" %
                      [instance.id for instance in config_servers])
            # filter members (non router/configsvr) into a new list: members
            members = [instance for instance in instances if
                       instance.type == 'member']
            LOG.debug("members: %s" %
                      [instance.id for instance in members])

            # for config_server in config_servers, append ip/hostname to
            # "config_server_hosts", then
            # peel off the replica-set name and ip/hostname from 'x'
            config_server_ips = [self.get_ip(instance)
                                 for instance in config_servers]
            LOG.debug("config server ips: %s" % config_server_ips)

            # Give the query routers the configsvr ips to connect to.
            # Create the admin user on the query routers.
            # The first will create the user, and the others will just reset
            # the password to the same value.
            LOG.debug("calling add_config_servers on, and sending admin user "
                      "password to, query_routers")
            try:
                admin_created = False
                admin_password = utils.generate_random_password()
                for query_router in query_routers:
                    guest = self.get_guest(query_router)
                    guest.add_config_servers(config_server_ips)
                    if admin_created:
                        guest.store_admin_password(admin_password)
                    else:
                        guest.create_admin_user(admin_password)
                        admin_created = True
            except Exception:
                LOG.exception(_("error adding config servers"))
                self.update_statuses_on_failure(cluster_id)
                return

            if not self._create_replica_set(members, cluster_id):
                return

            replica_set_name = "rs1"
            if not self._create_shard(query_routers, replica_set_name,
                                      members, cluster_id):
                return
            # call to start checking status
            for instance in instances:
                self.get_guest(instance).cluster_complete()
Example #38
0
        def _grow_cluster():
            # Wait for new nodes to get to cluster-ready status.
            LOG.debug("Waiting for new nodes to become ready.")
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                return

            new_instances = [Instance.load(context, instance_id) for instance_id in new_instance_ids]
            added_nodes = [self.build_node_info(instance) for instance in new_instances]

            LOG.debug("All nodes ready, proceeding with cluster setup.")

            cluster_node_ids = self.find_cluster_node_ids(cluster_id)
            cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids)

            # Recompute the seed nodes based on the updated cluster geometry.
            seeds = self.choose_seed_nodes(cluster_nodes)

            # Configure each cluster node with the updated list of seeds.
            # Since we are adding to an existing cluster, ensure that the
            # new nodes have auto-bootstrapping enabled.
            # Start the added nodes.
            try:
                LOG.debug("Selected seed nodes: %s" % seeds)

                # Update the seeds on all nodes.
                # Also retrieve the superuser password from one previously
                # existing node.
                admin_creds = None
                for node in cluster_nodes:
                    LOG.debug("Configuring node: %s." % node["id"])
                    node["guest"].set_seeds(seeds)
                    if (admin_creds is None) and (node not in added_nodes):
                        admin_creds = node["guest"].get_admin_credentials()

                # Start any seeds from the added nodes first.
                LOG.debug("Starting new seed nodes.")
                for node in added_nodes:
                    if node["ip"] in seeds:
                        node["guest"].set_auto_bootstrap(True)
                        node["guest"].store_admin_credentials(admin_creds)
                        node["guest"].restart()
                        node["guest"].cluster_complete()

                LOG.debug("All new seeds running, starting the remaining of " "added nodes.")
                for node in added_nodes:
                    if node["ip"] not in seeds:
                        node["guest"].set_auto_bootstrap(True)
                        node["guest"].store_admin_credentials(admin_creds)
                        node["guest"].restart()
                        node["guest"].cluster_complete()

                # Run nodetool cleanup on each of the previously existing nodes
                # to remove the keys that no longer belong to those nodes.
                # Wait for cleanup to complete on one node before running
                # it on the next node.
                LOG.debug("Cleaning up orphan data on old cluster nodes.")
                for node in cluster_nodes:
                    if node not in added_nodes:
                        nid = node["id"]
                        node["guest"].node_cleanup_begin()
                        node["guest"].node_cleanup()
                        LOG.debug("Waiting for node to finish its " "cleanup: %s" % nid)
                        if not self._all_instances_running([nid], cluster_id):
                            LOG.warning(_("Node did not complete cleanup " "successfully: %s") % nid)

                LOG.debug("Cluster configuration finished successfully.")
            except Exception:
                LOG.exception(_("Error growing cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #39
0
        def _create_cluster():

            # fetch instances by cluster_id against instances table
            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]
            LOG.debug("instances in cluster %s: %s" % (cluster_id,
                                                       instance_ids))

            if not self._all_instances_ready(instance_ids, cluster_id):
                return

            LOG.debug("all instances in cluster %s ready." % cluster_id)

            instances = [Instance.load(context, instance_id) for instance_id
                         in instance_ids]

            # filter query routers in instances into a new list: query_routers
            query_routers = [instance for instance in instances if
                             instance.type == 'query_router']
            LOG.debug("query routers: %s" %
                      [instance.id for instance in query_routers])
            # filter config servers in instances into new list: config_servers
            config_servers = [instance for instance in instances if
                              instance.type == 'config_server']
            LOG.debug("config servers: %s" %
                      [instance.id for instance in config_servers])
            # filter members (non router/configsvr) into a new list: members
            members = [instance for instance in instances if
                       instance.type == 'member']
            LOG.debug("members: %s" %
                      [instance.id for instance in members])

            # for config_server in config_servers, append ip/hostname to
            # "config_server_hosts", then
            # peel off the replica-set name and ip/hostname from 'x'
            config_server_ips = [self.get_ip(instance)
                                 for instance in config_servers]
            LOG.debug("config server ips: %s" % config_server_ips)

            # Give the query routers the configsvr ips to connect to.
            # Create the admin user on the query routers.
            # The first will create the user, and the others will just reset
            # the password to the same value.
            LOG.debug("calling add_config_servers on, and sending admin user "
                      "password to, query_routers")
            try:
                admin_created = False
                admin_password = utils.generate_random_password()
                for query_router in query_routers:
                    guest = self.get_guest(query_router)
                    guest.add_config_servers(config_server_ips)
                    if admin_created:
                        guest.store_admin_password(admin_password)
                    else:
                        guest.create_admin_user(admin_password)
                        admin_created = True
            except Exception:
                LOG.exception(_("error adding config servers"))
                self.update_statuses_on_failure(cluster_id)
                return

            if not self._create_replica_set(members, cluster_id):
                return

            replica_set_name = "rs1"
            if not self._create_shard(query_routers, replica_set_name,
                                      members, cluster_id):
                return
            # call to start checking status
            for instance in instances:
                self.get_guest(instance).cluster_complete()
Example #40
0
        def _create_resources():
            # parse the ID from the Ref
            instance_id = utils.get_id_from_href(instance)

            # verify that the instance exists and can perform actions
            from trove.instance.models import Instance
            instance_model = Instance.load(context, instance_id)
            instance_model.validate_can_perform_action()
            cls.validate_can_perform_action(
                instance_model, 'backup_create')
            cls.verify_swift_auth_token(context)

            ds = instance_model.datastore
            ds_version = instance_model.datastore_version

            manager = (datastore_models.DatastoreVersion.load_by_uuid(
                ds_version.id).manager)
            if (not CONF.get(manager).enable_cluster_instance_backup
                    and instance_model.cluster_id is not None):
                raise exception.ClusterInstanceOperationNotSupported()

            parent = None
            parent_backup = None
            parent_backup_id = None
            last_backup_id = None
            if parent_id:
                # Look up the parent info or fail early if not found or if
                # the user does not have access to the parent.
                if parent_id == 'last':
                    # Need to assign parent_id to a new variable
                    # parent_backup_id, otherwise an unbound variable error
                    # will be thrown
                    parent_backup = Backup.get_last_completed(context,
                                                              instance_id,
                                                              True)
                    if parent_backup is None:
                        raise exception.NotFound()
                    parent_backup_id = parent_backup.id
                else:
                    parent_backup_id = parent_id
                _parent = cls.get_by_id(context, parent_backup_id)
                parent = {
                    'id': parent_backup_id,
                    'location': _parent.location,
                    'checksum': _parent.checksum,
                }
            elif incremental:
                _parent = Backup.get_last_completed(context, instance_id)
                if _parent:
                    parent = {
                        'location': _parent.location,
                        'checksum': _parent.checksum
                    }
                    last_backup_id = _parent.id
            try:
                db_info = DBBackup.create(name=name,
                                          description=description,
                                          tenant_id=context.tenant,
                                          state=BackupState.NEW,
                                          instance_id=instance_id,
                                          parent_id=parent_backup_id or
                                          last_backup_id,
                                          datastore_version_id=ds_version.id,
                                          deleted=False)
            except exception.InvalidModelError as ex:
                LOG.exception(_("Unable to create backup record for "
                                "instance: %s"), instance_id)
                raise exception.BackupCreationError(str(ex))

            backup_info = {'id': db_info.id,
                           'name': name,
                           'description': description,
                           'instance_id': instance_id,
                           'backup_type': db_info.backup_type,
                           'checksum': db_info.checksum,
                           'parent': parent,
                           'datastore': ds.name,
                           'datastore_version': ds_version.name,
                           }
            api.API(context).create_backup(backup_info, instance_id)
            return db_info
Example #41
0
 def get_cluster_admin_password(self, context):
     """The cluster admin's user credentials are stored on all query
     routers. Find one and get the guest to return the password.
     """
     instance = Instance.load(context, self._get_running_query_router_id())
     return self.get_guest(instance).get_admin_password()
Example #42
0
        def _grow_cluster():

            db_instances = DBInstance.find_all(
                cluster_id=cluster_id, deleted=False).all()
            existing_instances = [Instance.load(context, db_inst.id)
                                  for db_inst in db_instances
                                  if db_inst.id not in new_instance_ids]
            if not existing_instances:
                raise TroveError(_("Unable to determine existing cluster "
                                   "member(s)"))

            # get list of ips of existing cluster members
            existing_cluster_ips = [self.get_ip(instance) for instance in
                                    existing_instances]
            existing_instance_guests = [self.get_guest(instance)
                                        for instance in existing_instances]

            # get the cluster context to setup new members
            cluster_context = existing_instance_guests[0].get_cluster_context()

            # Wait for cluster members to get to cluster-ready status.
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                raise TroveError(_("Instances in cluster did not report "
                                   "ACTIVE"))

            LOG.debug("All members ready, proceeding for cluster setup.")

            # Get the new instances to join the cluster
            new_instances = [Instance.load(context, instance_id)
                             for instance_id in new_instance_ids]
            new_cluster_ips = [self.get_ip(instance) for instance in
                               new_instances]
            for instance in new_instances:
                guest = self.get_guest(instance)

                guest.reset_admin_password(cluster_context['admin_password'])

                # render the conf.d/cluster.cnf configuration
                cluster_configuration = self._render_cluster_config(
                    context,
                    instance,
                    ",".join(existing_cluster_ips),
                    cluster_context['cluster_name'],
                    cluster_context['replication_user'])

                # push the cluster config and bootstrap the first instance
                bootstrap = False
                guest.install_cluster(cluster_context['replication_user'],
                                      cluster_configuration,
                                      bootstrap)

            self._check_cluster_for_root(context,
                                         existing_instances,
                                         new_instances)

            # apply the new config to all instances
            for instance in existing_instances + new_instances:
                guest = self.get_guest(instance)
                # render the conf.d/cluster.cnf configuration
                cluster_configuration = self._render_cluster_config(
                    context,
                    instance,
                    ",".join(existing_cluster_ips + new_cluster_ips),
                    cluster_context['cluster_name'],
                    cluster_context['replication_user'])
                guest.write_cluster_configuration_overrides(
                    cluster_configuration)

            for instance in new_instances:
                guest = self.get_guest(instance)
                guest.cluster_complete()
Example #43
0
        def _create_cluster():
            # Fetch instances by cluster_id against instances table.
            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]

            LOG.debug("Waiting for instances to get to cluster-ready status.")
            # Wait for cluster members to get to cluster-ready status.
            if not self._all_instances_ready(instance_ids, cluster_id):
                raise TroveError(
                    _("Instances in cluster did not report "
                      "ACTIVE"))

            LOG.debug("All members ready, proceeding for cluster setup.")
            instances = [
                Instance.load(context, instance_id)
                for instance_id in instance_ids
            ]

            cluster_ips = [self.get_ip(instance) for instance in instances]
            instance_guests = [
                self.get_guest(instance) for instance in instances
            ]

            # Create replication user and password for synchronizing the
            # galera cluster
            replication_user = {
                "name": self.CLUSTER_REPLICATION_USER,
                "password": utils.generate_random_password(),
            }

            # Galera cluster name must be unique and be shorter than a full
            # uuid string so we remove the hyphens and chop it off. It was
            # recommended to be 16 chars or less.
            # (this is not currently documented on Galera docs)
            cluster_name = utils.generate_uuid().replace("-", "")[:16]

            LOG.debug("Configuring cluster configuration.")
            try:
                # Set the admin password for all the instances because the
                # password in the my.cnf will be wrong after the joiner
                # instances syncs with the donor instance.
                admin_password = str(utils.generate_random_password())
                for guest in instance_guests:
                    guest.reset_admin_password(admin_password)

                bootstrap = True
                for instance in instances:
                    guest = self.get_guest(instance)

                    # render the conf.d/cluster.cnf configuration
                    cluster_configuration = self._render_cluster_config(
                        context, instance, ",".join(cluster_ips), cluster_name,
                        replication_user)

                    # push the cluster config and bootstrap the first instance
                    guest.install_cluster(replication_user,
                                          cluster_configuration, bootstrap)
                    bootstrap = False

                LOG.debug("Finalizing cluster configuration.")
                for guest in instance_guests:
                    guest.cluster_complete()
            except Exception:
                LOG.exception(_("Error creating cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #44
0
        def _grow_cluster():

            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()
            existing_instances = [
                Instance.load(context, db_inst.id) for db_inst in db_instances
                if db_inst.id not in new_instance_ids
            ]
            if not existing_instances:
                raise TroveError(
                    _("Unable to determine existing cluster "
                      "member(s)"))

            # get list of ips of existing cluster members
            existing_cluster_ips = [
                self.get_ip(instance) for instance in existing_instances
            ]
            existing_instance_guests = [
                self.get_guest(instance) for instance in existing_instances
            ]

            # get the cluster context to setup new members
            cluster_context = existing_instance_guests[0].get_cluster_context()

            # Wait for cluster members to get to cluster-ready status.
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                raise TroveError(
                    _("Instances in cluster did not report "
                      "ACTIVE"))

            LOG.debug("All members ready, proceeding for cluster setup.")

            # Get the new instances to join the cluster
            new_instances = [
                Instance.load(context, instance_id)
                for instance_id in new_instance_ids
            ]
            new_cluster_ips = [
                self.get_ip(instance) for instance in new_instances
            ]
            for instance in new_instances:
                guest = self.get_guest(instance)

                guest.reset_admin_password(cluster_context['admin_password'])

                # render the conf.d/cluster.cnf configuration
                cluster_configuration = self._render_cluster_config(
                    context, instance, ",".join(existing_cluster_ips),
                    cluster_context['cluster_name'],
                    cluster_context['replication_user'])

                # push the cluster config and bootstrap the first instance
                bootstrap = False
                guest.install_cluster(cluster_context['replication_user'],
                                      cluster_configuration, bootstrap)

            self._check_cluster_for_root(context, existing_instances,
                                         new_instances)

            # apply the new config to all instances
            for instance in existing_instances + new_instances:
                guest = self.get_guest(instance)
                # render the conf.d/cluster.cnf configuration
                cluster_configuration = self._render_cluster_config(
                    context, instance,
                    ",".join(existing_cluster_ips + new_cluster_ips),
                    cluster_context['cluster_name'],
                    cluster_context['replication_user'])
                guest.write_cluster_configuration_overrides(
                    cluster_configuration)

            for instance in new_instances:
                guest = self.get_guest(instance)
                guest.cluster_complete()
 def load_cluster_nodes(cls, context, node_ids):
     return [cls.build_node_info(Instance.load(context, node_id))
             for node_id in node_ids]
Example #46
0
        def _grow_cluster():
            # Wait for new nodes to get to cluster-ready status.
            LOG.debug("Waiting for new nodes to become ready.")
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                return

            new_instances = [Instance.load(context, instance_id)
                             for instance_id in new_instance_ids]
            added_nodes = [self.build_node_info(instance)
                           for instance in new_instances]

            LOG.debug("All nodes ready, proceeding with cluster setup.")

            cluster_node_ids = self.find_cluster_node_ids(cluster_id)
            cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids)

            # Recompute the seed nodes based on the updated cluster geometry.
            seeds = self.choose_seed_nodes(cluster_nodes)

            # Configure each cluster node with the updated list of seeds.
            # Since we are adding to an existing cluster, ensure that the
            # new nodes have auto-bootstrapping enabled.
            # Start the added nodes.
            try:
                LOG.debug("Selected seed nodes: %s" % seeds)

                # Update the seeds on all nodes.
                # Also retrieve the superuser password from one previously
                # existing node.
                admin_creds = None
                for node in cluster_nodes:
                    LOG.debug("Configuring node: %s." % node['id'])
                    node['guest'].set_seeds(seeds)
                    if (admin_creds is None) and (node not in added_nodes):
                        admin_creds = node['guest'].get_admin_credentials()

                # Start any seeds from the added nodes first.
                LOG.debug("Starting new seed nodes.")
                for node in added_nodes:
                    if node['ip'] in seeds:
                        node['guest'].set_auto_bootstrap(True)
                        node['guest'].store_admin_credentials(admin_creds)
                        node['guest'].restart()
                        node['guest'].cluster_complete()

                LOG.debug("All new seeds running, starting the remaining of "
                          "added nodes.")
                for node in added_nodes:
                    if node['ip'] not in seeds:
                        node['guest'].set_auto_bootstrap(True)
                        node['guest'].store_admin_credentials(admin_creds)
                        node['guest'].restart()
                        node['guest'].cluster_complete()

                # Run nodetool cleanup on each of the previously existing nodes
                # to remove the keys that no longer belong to those nodes.
                # Wait for cleanup to complete on one node before running
                # it on the next node.
                LOG.debug("Cleaning up orphan data on old cluster nodes.")
                for node in cluster_nodes:
                    if node not in added_nodes:
                        nid = node['id']
                        node['guest'].node_cleanup_begin()
                        node['guest'].node_cleanup()
                        LOG.debug("Waiting for node to finish its "
                                  "cleanup: %s" % nid)
                        if not self._all_instances_running([nid], cluster_id):
                            LOG.warning(_("Node did not complete cleanup "
                                          "successfully: %s") % nid)

                LOG.debug("Cluster configuration finished successfully.")
            except Exception:
                LOG.exception(_("Error growing cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #47
0
    def shrink_cluster(self, context, cluster_id, removal_ids):
        """Shrink a K2hdkc Cluster."""
        LOG.debug(
            "Begins shrink_cluster for %s. removal_ids:{}".format(removal_ids),
            cluster_id)

        # 1. validates args
        if context is None:
            LOG.error("no context")
            return
        if cluster_id is None:
            LOG.error("no cluster_id")
            return
        if removal_ids is None:
            LOG.error("no removal_ids")
            return

        timeout = Timeout(CONF.cluster_usage_timeout)
        try:
            # 2. Retrieves db_instances from the database
            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()
            # 3. Retrieves instance ids from the db_instances
            instance_ids = [db_instance.id for db_instance in db_instances]
            # 4. Checks if instances are running
            if not self._all_instances_running(instance_ids, cluster_id):
                LOG.error("instances are not ready yet")
                return
            # 4. Loads instances
            instances = [
                Instance.load(context, instance_id)
                for instance_id in removal_ids
            ]
            LOG.debug("len(instances) {}".format(len(instances)))

            # 5. Instances GuestAgent class
            # 6.2. Checks if removing instances are
            # if not self._all_instances_shutdown(removal_ids, cluster_id):
            #    LOG.error("removing instances are not shutdown yet")
            #    return
            # 7. Calls cluster_complete endpoint of K2hdkcGuestAgent
            LOG.debug(
                "Calling cluster_complete as a final hook to each node in the cluster"
            )
            for instance in instances:
                self.get_guest(instance).cluster_complete()
            # 8. delete node from OpenStack
            LOG.debug("delete node from OpenStack")
            for instance in instances:
                Instance.delete(instance)
            # 9. reset the current cluster task status to None
            LOG.debug("reset cluster task to None")
            self.reset_task()
        except Timeout:
            # Note adminstrators should reset task via CLI in this case.
            if Timeout is not timeout:
                raise  # not my timeout
            LOG.exception("Timeout for shrink cluster.")
            self.update_statuses_on_failure(
                cluster_id, status=inst_tasks.InstanceTasks.SHRINKING_ERROR)
        finally:
            timeout.cancel()

        LOG.debug("Completed shrink_cluster for %s.", cluster_id)
Example #48
0
        def _grow_cluster():
            # Wait for new nodes to get to cluster-ready status.
            LOG.debug("Waiting for new nodes to become ready.")
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                return

            new_instances = [
                Instance.load(context, instance_id)
                for instance_id in new_instance_ids
            ]
            added_nodes = [
                self.build_node_info(instance) for instance in new_instances
            ]

            LOG.debug("All nodes ready, proceeding with cluster setup.")

            cluster_node_ids = self.find_cluster_node_ids(cluster_id)
            cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids)

            old_nodes = [
                node for node in cluster_nodes
                if node['id'] not in new_instance_ids
            ]

            try:

                # All nodes should have the same seeds and credentials.
                # Retrieve the information from the first node.
                test_node = old_nodes[0]
                current_seeds = test_node['guest'].get_seeds()
                admin_creds = test_node['guest'].get_admin_credentials()

                # Bootstrap new nodes.
                # Seed nodes do not bootstrap. Current running nodes
                # must be used as seeds during the process.
                # Since we are adding to an existing cluster, ensure that the
                # new nodes have auto-bootstrapping enabled.
                # Start the added nodes.
                LOG.debug("Starting new nodes.")
                for node in added_nodes:
                    node['guest'].set_auto_bootstrap(True)
                    node['guest'].set_seeds(current_seeds)
                    node['guest'].store_admin_credentials(admin_creds)
                    node['guest'].restart()
                    node['guest'].cluster_complete()

                # Recompute the seed nodes based on the updated cluster
                # geometry.
                seeds = self.choose_seed_nodes(cluster_nodes)

                # Configure each cluster node with the updated list of seeds.
                LOG.debug("Updating all nodes with new seeds: %s", seeds)
                for node in cluster_nodes:
                    node['guest'].set_seeds(seeds)

                # Run nodetool cleanup on each of the previously existing nodes
                # to remove the keys that no longer belong to those nodes.
                # Wait for cleanup to complete on one node before running
                # it on the next node.
                LOG.debug("Cleaning up orphan data on old cluster nodes.")
                for node in old_nodes:
                    nid = node['id']
                    node['guest'].node_cleanup_begin()
                    node['guest'].node_cleanup()
                    LOG.debug("Waiting for node to finish its "
                              "cleanup: %s", nid)
                    if not self._all_instances_running([nid], cluster_id):
                        LOG.warning(
                            _("Node did not complete cleanup "
                              "successfully: %s"), nid)

                LOG.debug("Cluster configuration finished successfully.")
            except Exception:
                LOG.exception(_("Error growing cluster."))
                self.update_statuses_on_failure(cluster_id)
        def _grow_cluster():
            # Wait for new nodes to get to cluster-ready status.
            LOG.debug("Waiting for new nodes to become ready.")
            if not self._all_instances_ready(new_instance_ids, cluster_id):
                return

            new_instances = [Instance.load(context, instance_id)
                             for instance_id in new_instance_ids]
            added_nodes = [self.build_node_info(instance)
                           for instance in new_instances]

            LOG.debug("All nodes ready, proceeding with cluster setup.")

            cluster_node_ids = self.find_cluster_node_ids(cluster_id)
            cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids)

            old_nodes = [node for node in cluster_nodes
                         if node['id'] not in new_instance_ids]

            try:

                # All nodes should have the same seeds and credentials.
                # Retrieve the information from the first node.
                test_node = old_nodes[0]
                current_seeds = test_node['guest'].get_seeds()
                admin_creds = test_node['guest'].get_admin_credentials()

                # Bootstrap new nodes.
                # Seed nodes do not bootstrap. Current running nodes
                # must be used as seeds during the process.
                # Since we are adding to an existing cluster, ensure that the
                # new nodes have auto-bootstrapping enabled.
                # Start the added nodes.
                LOG.debug("Starting new nodes.")
                for node in added_nodes:
                    node['guest'].set_auto_bootstrap(True)
                    node['guest'].set_seeds(current_seeds)
                    node['guest'].store_admin_credentials(admin_creds)
                    node['guest'].restart()
                    node['guest'].cluster_complete()

                # Recompute the seed nodes based on the updated cluster
                # geometry.
                seeds = self.choose_seed_nodes(cluster_nodes)

                # Configure each cluster node with the updated list of seeds.
                LOG.debug("Updating all nodes with new seeds: %s" % seeds)
                for node in cluster_nodes:
                    node['guest'].set_seeds(seeds)

                # Run nodetool cleanup on each of the previously existing nodes
                # to remove the keys that no longer belong to those nodes.
                # Wait for cleanup to complete on one node before running
                # it on the next node.
                LOG.debug("Cleaning up orphan data on old cluster nodes.")
                for node in old_nodes:
                    nid = node['id']
                    node['guest'].node_cleanup_begin()
                    node['guest'].node_cleanup()
                    LOG.debug("Waiting for node to finish its "
                              "cleanup: %s" % nid)
                    if not self._all_instances_running([nid], cluster_id):
                        LOG.warning(_("Node did not complete cleanup "
                                      "successfully: %s") % nid)

                LOG.debug("Cluster configuration finished successfully.")
            except Exception:
                LOG.exception(_("Error growing cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #50
0
        def _create_cluster():
            # Fetch instances by cluster_id against instances table.
            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            instance_ids = [db_instance.id for db_instance in db_instances]

            LOG.debug("Waiting for instances to get to cluster-ready status.")
            # Wait for cluster members to get to cluster-ready status.
            if not self._all_instances_ready(instance_ids, cluster_id):
                raise TroveError(_("Instances in cluster did not report "
                                   "ACTIVE"))

            LOG.debug("All members ready, proceeding for cluster setup.")
            instances = [Instance.load(context, instance_id) for instance_id
                         in instance_ids]

            cluster_ips = [self.get_ip(instance) for instance in instances]
            instance_guests = [self.get_guest(instance)
                               for instance in instances]

            # Create replication user and password for synchronizing the
            # galera cluster
            replication_user = {
                "name": self.CLUSTER_REPLICATION_USER,
                "password": utils.generate_random_password(),
            }

            # Galera cluster name must be unique and be shorter than a full
            # uuid string so we remove the hyphens and chop it off. It was
            # recommended to be 16 chars or less.
            # (this is not currently documented on Galera docs)
            cluster_name = utils.generate_uuid().replace("-", "")[:16]

            LOG.debug("Configuring cluster configuration.")
            try:
                # Set the admin password for all the instances because the
                # password in the my.cnf will be wrong after the joiner
                # instances syncs with the donor instance.
                admin_password = str(utils.generate_random_password())
                for guest in instance_guests:
                    guest.reset_admin_password(admin_password)

                bootstrap = True
                for instance in instances:
                    guest = self.get_guest(instance)

                    # render the conf.d/cluster.cnf configuration
                    cluster_configuration = self._render_cluster_config(
                        context,
                        instance,
                        ",".join(cluster_ips),
                        cluster_name,
                        replication_user)

                    # push the cluster config and bootstrap the first instance
                    guest.install_cluster(replication_user,
                                          cluster_configuration,
                                          bootstrap)
                    bootstrap = False

                LOG.debug("Finalizing cluster configuration.")
                for guest in instance_guests:
                    guest.cluster_complete()
            except Exception:
                LOG.exception(_("Error creating cluster."))
                self.update_statuses_on_failure(cluster_id)