Example #1
0
        def _shrink_cluster():
            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()

            all_instance_ids = [db_instance.id for db_instance in db_instances]

            remove_instances = [Instance.load(context, instance_id)
                                for instance_id in instance_ids]

            left_instances = [Instance.load(context, instance_id)
                              for instance_id
                              in all_instance_ids
                              if instance_id not in instance_ids]

            remove_member_ips = [self.get_ip(instance)
                                 for instance in remove_instances]

            k = VerticaCluster.k_safety(len(left_instances))

            for db_instance in db_instances:
                if db_instance['type'] == 'master':
                    master_instance = Instance.load(context,
                                                    db_instance.id)
                    if self.get_ip(master_instance) in remove_member_ips:
                        raise RuntimeError(_("Cannot remove master instance!"))
                    LOG.debug(_("Marking cluster k-safety: %s") % k)
                    self.get_guest(master_instance).mark_design_ksafe(k)
                    self.get_guest(master_instance).shrink_cluster(
                        remove_member_ips)
                    break

            for r in remove_instances:
                Instance.delete(r)
Example #2
0
        def _shrink_cluster():
            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()

            all_instance_ids = [db_instance.id for db_instance in db_instances]

            remove_instances = [Instance.load(context, instance_id)
                                for instance_id in instance_ids]

            left_instances = [Instance.load(context, instance_id)
                              for instance_id
                              in all_instance_ids
                              if instance_id not in instance_ids]

            remove_member_ips = [self.get_ip(instance)
                                 for instance in remove_instances]

            k = VerticaCluster.k_safety(len(left_instances))

            for db_instance in db_instances:
                if db_instance['type'] == 'master':
                    master_instance = Instance.load(context,
                                                    db_instance.id)
                    if self.get_ip(master_instance) in remove_member_ips:
                        raise RuntimeError(_("Cannot remove master instance!"))
                    LOG.debug("Marking cluster k-safety: %s", k)
                    self.get_guest(master_instance).mark_design_ksafe(k)
                    self.get_guest(master_instance).shrink_cluster(
                        remove_member_ips)
                    break

            for r in remove_instances:
                Instance.delete(r)
Example #3
0
        def _shrink_cluster():
            cluster_node_ids = self.find_cluster_node_ids(cluster_id)
            cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids)

            removed_nodes = CassandraClusterTasks.load_cluster_nodes(context, removal_ids)

            LOG.debug("All nodes ready, proceeding with cluster setup.")

            # Update the list of seeds on remaining nodes if necessary.
            # Once all nodes are configured, decommission the removed nodes.
            # Cassandra will stream data from decommissioned nodes to the
            # remaining ones.
            try:

                # All nodes should have the same seeds.
                # We retrieve current seeds from the first node.
                test_node = self.load_cluster_nodes(context, cluster_node_ids[:1])[0]
                current_seeds = test_node["guest"].get_seeds()
                # The seeds will have to be updated on all remaining instances
                # if any of the seed nodes is going to be removed.
                update_seeds = any(node["ip"] in current_seeds for node in removed_nodes)

                LOG.debug("Decommissioning removed nodes.")
                for node in removed_nodes:
                    node["guest"].node_decommission()
                    node["instance"].update_db(cluster_id=None)

                # Recompute the seed nodes based on the updated cluster
                # geometry if any of the existing seed nodes was removed.
                if update_seeds:
                    LOG.debug("Updating seeds on the remaining nodes.")
                    cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids)

                    remaining_nodes = [node for node in cluster_nodes if node not in removed_nodes]
                    seeds = self.choose_seed_nodes(remaining_nodes)
                    LOG.debug("Selected seed nodes: %s" % seeds)
                    for node in remaining_nodes:
                        LOG.debug("Configuring node: %s." % node["id"])
                        node["guest"].set_seeds(seeds)

                # Wait for the removed nodes to go SHUTDOWN.
                LOG.debug("Waiting for all decommissioned nodes to shutdown.")
                if not self._all_instances_shutdown(removal_ids, cluster_id):
                    # Now detached, failed nodes will stay available
                    # in the list of standalone instances.
                    return

                # Delete decommissioned instances only when the cluster is in a
                # consistent state.
                LOG.debug("Deleting decommissioned instances.")
                for node in removed_nodes:
                    Instance.delete(node["instance"])

                LOG.debug("Cluster configuration finished successfully.")
            except Exception:
                LOG.exception(_("Error shrinking cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #4
0
        def _shrink_cluster():
            removal_instances = [
                Instance.load(context, instance_id)
                for instance_id in removal_instance_ids
            ]
            for instance in removal_instances:
                Instance.delete(instance)

            # wait for instances to be deleted
            def all_instances_marked_deleted():
                non_deleted_instances = DBInstance.find_all(
                    cluster_id=cluster_id, deleted=False).all()
                non_deleted_ids = [
                    db_instance.id for db_instance in non_deleted_instances
                ]
                return not bool(
                    set(removal_instance_ids).intersection(
                        set(non_deleted_ids)))

            try:
                LOG.info("Deleting instances (%s)", removal_instance_ids)
                utils.poll_until(all_instances_marked_deleted,
                                 sleep_time=2,
                                 time_out=CONF.cluster_delete_time_out)
            except PollTimeOut:
                LOG.error("timeout for instances to be marked as deleted.")
                return

            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()
            leftover_instances = [
                Instance.load(context, db_inst.id) for db_inst in db_instances
                if db_inst.id not in removal_instance_ids
            ]
            leftover_cluster_ips = [
                self.get_ip(instance) for instance in leftover_instances
            ]

            # Get config changes for left over instances
            rnd_cluster_guest = self.get_guest(leftover_instances[0])
            cluster_context = rnd_cluster_guest.get_cluster_context()

            # apply the new config to all leftover instances
            for instance in leftover_instances:
                guest = self.get_guest(instance)
                # render the conf.d/cluster.cnf configuration
                cluster_configuration = self._render_cluster_config(
                    context, instance, ",".join(leftover_cluster_ips),
                    cluster_context['cluster_name'],
                    cluster_context['replication_user'])
                guest.write_cluster_configuration_overrides(
                    cluster_configuration)
Example #5
0
        def _shrink_cluster():
            removal_instances = [Instance.load(context, instance_id)
                                 for instance_id in removal_instance_ids]
            for instance in removal_instances:
                Instance.delete(instance)

            # wait for instances to be deleted
            def all_instances_marked_deleted():
                non_deleted_instances = DBInstance.find_all(
                    cluster_id=cluster_id, deleted=False).all()
                non_deleted_ids = [db_instance.id for db_instance
                                   in non_deleted_instances]
                return not bool(
                    set(removal_instance_ids).intersection(
                        set(non_deleted_ids))
                )
            try:
                LOG.info(_("Deleting instances (%s)") % removal_instance_ids)
                utils.poll_until(all_instances_marked_deleted,
                                 sleep_time=2,
                                 time_out=CONF.cluster_delete_time_out)
            except PollTimeOut:
                LOG.error(_("timeout for instances to be marked as deleted."))
                return

            db_instances = DBInstance.find_all(cluster_id=cluster_id).all()
            leftover_instances = [Instance.load(context, db_inst.id)
                                  for db_inst in db_instances
                                  if db_inst.id not in removal_instance_ids]
            leftover_cluster_ips = [self.get_ip(instance) for instance in
                                    leftover_instances]

            # Get config changes for left over instances
            rnd_cluster_guest = self.get_guest(leftover_instances[0])
            cluster_context = rnd_cluster_guest.get_cluster_context()

            # apply the new config to all leftover instances
            for instance in leftover_instances:
                guest = self.get_guest(instance)
                # render the conf.d/cluster.cnf configuration
                cluster_configuration = self._render_cluster_config(
                    context,
                    instance,
                    ",".join(leftover_cluster_ips),
                    cluster_context['cluster_name'],
                    cluster_context['replication_user'])
                guest.write_cluster_configuration_overrides(
                    cluster_configuration)
Example #6
0
    def _remove_nodes(self, coordinator, removed_nodes):
        LOG.debug("Decommissioning nodes and rebalancing the cluster.")
        guest_node_info = self.build_guest_node_info(removed_nodes)
        result = coordinator['guest'].remove_nodes(guest_node_info)
        if not result or len(result) < 2:
            raise exception.TroveError(
                _("No status returned from removing nodes from cluster."))

        if result[0]:
            for node in removed_nodes:
                instance = node['instance']
                LOG.debug("Deleting decommissioned instance %s." %
                          instance.id)
                instance.update_db(cluster_id=None)
                Instance.delete(instance)
        else:
            raise exception.TroveError(
                _("Could not remove nodes from cluster: %s") % result[1])
    def _remove_nodes(self, coordinator, removed_nodes):
        LOG.debug("Decommissioning nodes and rebalacing the cluster.")
        coordinator['guest'].remove_nodes({node['ip']
                                           for node in removed_nodes})

        # Always remove decommissioned instances from the cluster,
        # irrespective of the result of rebalancing.
        for node in removed_nodes:
            node['instance'].update_db(cluster_id=None)

        LOG.debug("Waiting for the rebalancing process to finish.")
        self._wait_for_rebalance_to_finish(coordinator)

        # Delete decommissioned instances only when the cluster is in a
        # consistent state.
        LOG.debug("Deleting decommissioned instances.")
        for node in removed_nodes:
            Instance.delete(node['instance'])
Example #8
0
    def _remove_nodes(self, coordinator, removed_nodes):
        LOG.debug("Decommissioning nodes and rebalacing the cluster.")
        coordinator['guest'].remove_nodes(
            {node['ip']
             for node in removed_nodes})

        # Always remove decommissioned instances from the cluster,
        # irrespective of the result of rebalancing.
        for node in removed_nodes:
            node['instance'].update_db(cluster_id=None)

        LOG.debug("Waiting for the rebalancing process to finish.")
        self._wait_for_rebalance_to_finish(coordinator)

        # Delete decommissioned instances only when the cluster is in a
        # consistent state.
        LOG.debug("Deleting decommissioned instances.")
        for node in removed_nodes:
            Instance.delete(node['instance'])
Example #9
0
        def _shrink_cluster():
            cluster_node_ids = self.find_cluster_node_ids(cluster_id)
            cluster_nodes = self.load_cluster_nodes(context, cluster_node_ids)

            removed_nodes = CassandraClusterTasks.load_cluster_nodes(
                context, removal_ids)

            LOG.debug("All nodes ready, proceeding with cluster setup.")

            # Update the list of seeds on remaining nodes if necessary.
            # Once all nodes are configured, decommission the removed nodes.
            # Cassandra will stream data from decommissioned nodes to the
            # remaining ones.
            try:

                # All nodes should have the same seeds.
                # We retrieve current seeds from the first node.
                test_node = self.load_cluster_nodes(context,
                                                    cluster_node_ids[:1])[0]
                current_seeds = test_node['guest'].get_seeds()
                # The seeds will have to be updated on all remaining instances
                # if any of the seed nodes is going to be removed.
                update_seeds = any(node['ip'] in current_seeds
                                   for node in removed_nodes)

                LOG.debug("Decommissioning removed nodes.")
                for node in removed_nodes:
                    node['guest'].node_decommission()
                    node['instance'].update_db(cluster_id=None)

                # Recompute the seed nodes based on the updated cluster
                # geometry if any of the existing seed nodes was removed.
                if update_seeds:
                    LOG.debug("Updating seeds on the remaining nodes.")
                    cluster_nodes = self.load_cluster_nodes(
                        context, cluster_node_ids)

                    remaining_nodes = [
                        node for node in cluster_nodes
                        if node['id'] not in removal_ids
                    ]
                    seeds = self.choose_seed_nodes(remaining_nodes)
                    LOG.debug("Selected seed nodes: %s", seeds)
                    for node in remaining_nodes:
                        LOG.debug("Configuring node: %s.", node['id'])
                        node['guest'].set_seeds(seeds)

                # Wait for the removed nodes to go SHUTDOWN.
                LOG.debug("Waiting for all decommissioned nodes to shutdown.")
                if not self._all_instances_shutdown(removal_ids, cluster_id):
                    # Now detached, failed nodes will stay available
                    # in the list of standalone instances.
                    return

                # Delete decommissioned instances only when the cluster is in a
                # consistent state.
                LOG.debug("Deleting decommissioned instances.")
                for node in removed_nodes:
                    Instance.delete(node['instance'])

                LOG.debug("Cluster configuration finished successfully.")
            except Exception:
                LOG.exception(_("Error shrinking cluster."))
                self.update_statuses_on_failure(cluster_id)
Example #10
0
    def shrink_cluster(self, context, cluster_id, removal_ids):
        """Shrink a K2hdkc Cluster."""
        LOG.debug(
            "Begins shrink_cluster for %s. removal_ids:{}".format(removal_ids),
            cluster_id)

        # 1. validates args
        if context is None:
            LOG.error("no context")
            return
        if cluster_id is None:
            LOG.error("no cluster_id")
            return
        if removal_ids is None:
            LOG.error("no removal_ids")
            return

        timeout = Timeout(CONF.cluster_usage_timeout)
        try:
            # 2. Retrieves db_instances from the database
            db_instances = DBInstance.find_all(cluster_id=cluster_id,
                                               deleted=False).all()
            # 3. Retrieves instance ids from the db_instances
            instance_ids = [db_instance.id for db_instance in db_instances]
            # 4. Checks if instances are running
            if not self._all_instances_running(instance_ids, cluster_id):
                LOG.error("instances are not ready yet")
                return
            # 4. Loads instances
            instances = [
                Instance.load(context, instance_id)
                for instance_id in removal_ids
            ]
            LOG.debug("len(instances) {}".format(len(instances)))

            # 5. Instances GuestAgent class
            # 6.2. Checks if removing instances are
            # if not self._all_instances_shutdown(removal_ids, cluster_id):
            #    LOG.error("removing instances are not shutdown yet")
            #    return
            # 7. Calls cluster_complete endpoint of K2hdkcGuestAgent
            LOG.debug(
                "Calling cluster_complete as a final hook to each node in the cluster"
            )
            for instance in instances:
                self.get_guest(instance).cluster_complete()
            # 8. delete node from OpenStack
            LOG.debug("delete node from OpenStack")
            for instance in instances:
                Instance.delete(instance)
            # 9. reset the current cluster task status to None
            LOG.debug("reset cluster task to None")
            self.reset_task()
        except Timeout:
            # Note adminstrators should reset task via CLI in this case.
            if Timeout is not timeout:
                raise  # not my timeout
            LOG.exception("Timeout for shrink cluster.")
            self.update_statuses_on_failure(
                cluster_id, status=inst_tasks.InstanceTasks.SHRINKING_ERROR)
        finally:
            timeout.cancel()

        LOG.debug("Completed shrink_cluster for %s.", cluster_id)