def test_show_deleted_mgmt_instances(self): args = {'deleted': 0, 'cluster_id': None} db_infos_active = DBInstance.find_all(**args) args = {'deleted': 1, 'cluster_id': None} db_infos_deleted = DBInstance.find_all(**args) args = {'cluster_id': None} # db_infos_all = DBInstance.find_all(**args) # TODO(SlickNik) Fix this assert to work reliably in the gate. # This fails intermittenly when the unit tests run in parallel. # self.assertTrue(db_infos_all.count() == # db_infos_active.count() + # db_infos_deleted.count()) with patch.object(self.context, 'is_admin', return_value=True): deleted_instance = db_infos_deleted.all()[0] active_instance = db_infos_active.all()[0] instance = DBInstance.find_by(context=self.context, id=active_instance.id) self.assertEqual(active_instance.id, instance.id) self.assertRaises( exception.ModelNotFoundError, DBInstance.find_by, context=self.context, id=deleted_instance.id, deleted=False) instance = DBInstance.find_by(context=self.context, id=deleted_instance.id, deleted=True) self.assertEqual(deleted_instance.id, instance.id)
def _add_shard_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, shard_id=shard_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in shard %s: %s" % (shard_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id, shard_id): return members = [ Instance.load(context, instance_id) for instance_id in instance_ids ] if not self._create_replica_set(members, cluster_id, shard_id): return db_query_routers = DBInstance.find_all(cluster_id=cluster_id, type='query_router', deleted=False).all() query_routers = [ Instance.load(context, db_query_router.id) for db_query_router in db_query_routers ] if not self._create_shard(query_routers, replica_set_name, members, cluster_id, shard_id): return for member in members: self.get_guest(member).cluster_complete()
def test_show_deleted_mgmt_instances(self): args = {'deleted': 0, 'cluster_id': None} db_infos_active = DBInstance.find_all(**args) args = {'deleted': 1, 'cluster_id': None} db_infos_deleted = DBInstance.find_all(**args) args = {'cluster_id': None} # db_infos_all = DBInstance.find_all(**args) # TODO(SlickNik) Fix this assert to work reliably in the gate. # This fails intermittenly when the unit tests run in parallel. # self.assertTrue(db_infos_all.count() == # db_infos_active.count() + # db_infos_deleted.count()) with patch.object(self.context, 'is_admin', return_value=True): deleted_instance = db_infos_deleted.all()[0] active_instance = db_infos_active.all()[0] instance = DBInstance.find_by(context=self.context, id=active_instance.id) self.assertEqual(active_instance.id, instance.id) self.assertRaises(exception.ModelNotFoundError, DBInstance.find_by, context=self.context, id=deleted_instance.id, deleted=False) instance = DBInstance.find_by(context=self.context, id=deleted_instance.id, deleted=True) self.assertEqual(deleted_instance.id, instance.id)
def _add_shard_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, shard_id=shard_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in shard %s: %s" % (shard_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id, shard_id): return members = [Instance.load(context, instance_id) for instance_id in instance_ids] db_query_routers = DBInstance.find_all(cluster_id=cluster_id, type='query_router', deleted=False).all() query_routers = [Instance.load(context, db_query_router.id) for db_query_router in db_query_routers] if not self._create_shard(query_routers[0], members): return for member in members: self.get_guest(member).cluster_complete()
def _add_shard_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False, shard_id=shard_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in shard %(shard_id)s: %(instance_ids)s", {'shard_id': shard_id, 'instance_ids': instance_ids}) if not self._all_instances_ready(instance_ids, cluster_id, shard_id): return members = [Instance.load(context, instance_id) for instance_id in instance_ids] db_query_routers = DBInstance.find_all(cluster_id=cluster_id, type='query_router', deleted=False).all() query_routers = [Instance.load(context, db_query_router.id) for db_query_router in db_query_routers] if not self._create_shard(query_routers[0], members): return for member in members: self.get_guest(member).cluster_complete()
def update_statuses_on_failure(self, cluster_id, shard_id=None): if CONF.update_status_on_fail: if shard_id: db_instances = DBInstance.find_all(cluster_id=cluster_id, shard_id=shard_id).all() else: db_instances = DBInstance.find_all(cluster_id=cluster_id).all() for db_instance in db_instances: db_instance.set_task_status( InstanceTasks.BUILDING_ERROR_SERVER) db_instance.save()
def update_statuses_on_failure(self, cluster_id, shard_id=None): if CONF.update_status_on_fail: if shard_id: db_instances = DBInstance.find_all(cluster_id=cluster_id, shard_id=shard_id).all() else: db_instances = DBInstance.find_all( cluster_id=cluster_id).all() for db_instance in db_instances: db_instance.set_task_status( InstanceTasks.BUILDING_ERROR_SERVER) db_instance.save()
def _shrink_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() all_instance_ids = [db_instance.id for db_instance in db_instances] remove_instances = [Instance.load(context, instance_id) for instance_id in instance_ids] left_instances = [Instance.load(context, instance_id) for instance_id in all_instance_ids if instance_id not in instance_ids] remove_member_ips = [self.get_ip(instance) for instance in remove_instances] k = VerticaCluster.k_safety(len(left_instances)) for db_instance in db_instances: if db_instance['type'] == 'master': master_instance = Instance.load(context, db_instance.id) if self.get_ip(master_instance) in remove_member_ips: raise RuntimeError(_("Cannot remove master instance!")) LOG.debug("Marking cluster k-safety: %s", k) self.get_guest(master_instance).mark_design_ksafe(k) self.get_guest(master_instance).shrink_cluster( remove_member_ips) break for r in remove_instances: Instance.delete(r)
def _grow_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id).all() cluster_head = next( Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in new_instance_ids) if not cluster_head: raise TroveError( _("Unable to determine existing Redis cluster" " member")) (cluster_head_ip, cluster_head_port) = (self.get_guest(cluster_head).get_node_ip()) # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(new_instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") new_insts = [ Instance.load(context, instance_id) for instance_id in new_instance_ids ] new_guests = map(self.get_guest, new_insts) # Connect nodes to the cluster head for guest in new_guests: guest.cluster_meet(cluster_head_ip, cluster_head_port) for guest in new_guests: guest.cluster_complete()
def _grow_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id).all() cluster_head = next(Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in new_instance_ids) if not cluster_head: raise TroveError("Unable to determine existing Redis cluster " "member") (cluster_head_ip, cluster_head_port) = ( self.get_guest(cluster_head).get_node_ip()) # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(new_instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") new_insts = [Instance.load(context, instance_id) for instance_id in new_instance_ids] new_guests = map(self.get_guest, new_insts) # Connect nodes to the cluster head for guest in new_guests: guest.cluster_meet(cluster_head_ip, cluster_head_port) for guest in new_guests: guest.cluster_complete()
def shrink(self, instances): """Removes instances from a cluster.""" LOG.debug("Shrinking cluster %s.", self.id) self.validate_cluster_available() removal_instances = [ Instance.load(self.context, inst_id) for inst_id in instances ] db_instances = DBInstance.find_all(cluster_id=self.db_info.id, deleted=False).all() if len(db_instances) - len(removal_instances) < 1: raise exception.ClusterShrinkMustNotLeaveClusterEmpty() self.db_info.update(task_status=ClusterTasks.SHRINKING_CLUSTER) try: task_api.load(self.context, self.ds_version.manager).shrink_cluster( self.db_info.id, [instance.id for instance in removal_instances]) except Exception: self.db_info.update(task_status=ClusterTasks.NONE) raise return self.__class__(self.context, self.db_info, self.ds, self.ds_version)
def _shrink_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() all_instance_ids = [db_instance.id for db_instance in db_instances] remove_instances = [Instance.load(context, instance_id) for instance_id in instance_ids] left_instances = [Instance.load(context, instance_id) for instance_id in all_instance_ids if instance_id not in instance_ids] remove_member_ips = [self.get_ip(instance) for instance in remove_instances] k = VerticaCluster.k_safety(len(left_instances)) for db_instance in db_instances: if db_instance['type'] == 'master': master_instance = Instance.load(context, db_instance.id) if self.get_ip(master_instance) in remove_member_ips: raise RuntimeError(_("Cannot remove master instance!")) LOG.debug(_("Marking cluster k-safety: %s") % k) self.get_guest(master_instance).mark_design_ksafe(k) self.get_guest(master_instance).shrink_cluster( remove_member_ips) break for r in remove_instances: Instance.delete(r)
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %(cluster_id)s: %(instance_ids)s", {'cluster_id': cluster_id, 'instance_ids': instance_ids}) if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("all instances in cluster %s ready.", cluster_id) instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # filter tidb_server in instances into a new list: query_routers tidb_server = [instance for instance in instances if instance.type == 'tidb_server'] LOG.debug("tidb_server: %s", [instance.id for instance in query_routers]) # filter pd_server in instances into new list: config_servers pd_server = [instance for instance in instances if instance.type == 'pd_server'] LOG.debug("pd_server: %s", [instance.id for instance in pd_server]) # filter tikv into a new list: tikvs tikv = [instance for instance in instances if instance.type == 'tikv'] LOG.debug("tikv: %s", [instance.id for instance in tikv])
def load(context, id): client = create_nova_client(context) account = client.accounts.get_instances(id) db_infos = DBInstance.find_all(tenant_id=id, deleted=False) servers = [Server(server) for server in account.servers] instances = MgmtInstances.load_status_from_existing(context, db_infos, servers) return Account(id, instances)
def load(context, id): client = create_nova_client(context) account = client.accounts.get_instances(id) db_infos = DBInstance.find_all(tenant_id=id, deleted=False) servers = [Server(server) for server in account.servers] instances = MgmtInstances.load_status_from_existing( context, db_infos, servers) return Account(id, instances)
def all_instances_marked_deleted(): non_deleted_instances = DBInstance.find_all( cluster_id=cluster_id, deleted=False).all() non_deleted_ids = [db_instance.id for db_instance in non_deleted_instances] return not bool( set(instance_ids).intersection(set(non_deleted_ids)) )
def _find_query_router_ids(self, tenant_id, cluster_id): args = { 'tenant_id': tenant_id, 'cluster_id': cluster_id, 'type': 'query_router' } query_router_instances = DBInstance.find_all(**args).all() return [db_instance.id for db_instance in query_router_instances]
def _find_cluster_node_ids(self, tenant_id, cluster_id): args = { 'tenant_id': tenant_id, 'cluster_id': cluster_id, 'deleted': False } cluster_instances = DBInstance.find_all(**args).all() return [db_instance.id for db_instance in cluster_instances]
def all_instances_marked_deleted(): non_deleted_instances = DBInstance.find_all( cluster_id=cluster_id, deleted=False).all() non_deleted_ids = [db_instance.id for db_instance in non_deleted_instances] return not bool( set(instance_ids).intersection(set(non_deleted_ids)) )
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %s: %s" % (cluster_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id): return instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # filter query routers in instances into a new list: query_routers query_routers = [instance for instance in instances if instance.type == 'query_router'] LOG.debug("query routers: %s" % [instance.id for instance in query_routers]) # filter config servers in instances into new list: config_servers config_servers = [instance for instance in instances if instance.type == 'config_server'] LOG.debug("config servers: %s" % [instance.id for instance in config_servers]) # filter members (non router/configsvr) into a new list: members members = [instance for instance in instances if instance.type == 'member'] LOG.debug("members: %s" % [instance.id for instance in members]) # for config_server in config_servers, append ip/hostname to # "config_server_hosts", then # peel off the replica-set name and ip/hostname from 'x' config_server_ips = [self.get_ip(instance) for instance in config_servers] LOG.debug("config server ips: %s" % config_server_ips) LOG.debug("calling add_config_servers on query_routers") try: for query_router in query_routers: (self.get_guest(query_router) .add_config_servers(config_server_ips)) except Exception: LOG.exception(_("error adding config servers")) self.update_statuses_on_failure(cluster_id) return if not self._create_replica_set(members, cluster_id): return replica_set_name = "rs1" if not self._create_shard(query_routers, replica_set_name, members, cluster_id): return # call to start checking status for instance in instances: self.get_guest(instance).cluster_complete()
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %s: %s" % (cluster_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id): return instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # filter query routers in instances into a new list: query_routers query_routers = [instance for instance in instances if instance.type == 'query_router'] LOG.debug("query routers: %s" % [instance.id for instance in query_routers]) # filter config servers in instances into new list: config_servers config_servers = [instance for instance in instances if instance.type == 'config_server'] LOG.debug("config servers: %s" % [instance.id for instance in config_servers]) # filter members (non router/configsvr) into a new list: members members = [instance for instance in instances if instance.type == 'member'] LOG.debug("members: %s" % [instance.id for instance in members]) # for config_server in config_servers, append ip/hostname to # "config_server_hosts", then # peel off the replica-set name and ip/hostname from 'x' config_server_ips = [self.get_ip(instance) for instance in config_servers] LOG.debug("config server ips: %s" % config_server_ips) LOG.debug("calling add_config_servers on query_routers") try: for query_router in query_routers: (self.get_guest(query_router) .add_config_servers(config_server_ips)) except Exception: LOG.exception(_("error adding config servers")) self.update_statuses_on_failure(cluster_id) return if not self._create_replica_set(members, cluster_id): return replica_set_name = "rs1" if not self._create_shard(query_routers, replica_set_name, members, cluster_id): return # call to start checking status for instance in instances: self.get_guest(instance).cluster_complete()
def _get_cluster_instance_id(self, tenant_id, cluster_id): args = {'tenant_id': tenant_id, 'cluster_id': cluster_id} cluster_instances = DBInstance.find_all(**args).all() instance_ids = [db_instance.id for db_instance in cluster_instances] args = {'tenant_id': tenant_id, 'cluster_id': cluster_id, 'type': 'master'} master_instance = DBInstance.find_by(**args) master_instance_id = master_instance.id return master_instance_id, instance_ids
def _get_cluster_instance_id(self, tenant_id, cluster_id): args = {'tenant_id': tenant_id, 'cluster_id': cluster_id} cluster_instances = DBInstance.find_all(**args).all() instance_ids = [db_instance.id for db_instance in cluster_instances] args = {'tenant_id': tenant_id, 'cluster_id': cluster_id, 'type': 'master'} master_instance = DBInstance.find_by(**args) master_instance_id = master_instance.id return master_instance_id, instance_ids
def check_resize(group_id): resize = InstanceTasks.RESIZING instances = DBInstance.find_all(group_id=group_id,task_id=resize.code()) insts = list() for inst in instances: insts.append(inst) check_server_status(inst.id) return insts
def _get_slaves(tenant_id, instance_or_cluster_id, deleted=False): LOG.info("Getting non-deleted slaves of instance '%s', " "if any.", instance_or_cluster_id) args = {'slave_of_id': instance_or_cluster_id, 'tenant_id': tenant_id, 'deleted': deleted} db_infos = DBInstance.find_all(**args) slaves = [] for db_info in db_infos: slaves.append(db_info.id) return slaves
def update_statuses_on_failure(self, cluster_id): if CONF.update_status_on_fail: db_instances = DBInstance.find_all( cluster_id=cluster_id, deleted=False).all() for db_instance in db_instances: db_instance.set_task_status( InstanceTasks.BUILDING_ERROR_SERVER) db_instance.save()
def grow_cluster(self, context, cluster_id, new_instance_ids): """Grow a K2hdkc Cluster.""" LOG.debug( "Begins grow_cluster for %s. new_instance_ids:{}".format( new_instance_ids), cluster_id) # 1. validates args if context is None: LOG.error("no context") return if cluster_id is None: LOG.error("no cluster_id") return if new_instance_ids is None: LOG.error("no new_instance_ids") return timeout = Timeout(CONF.cluster_usage_timeout) try: # 2. Retrieves db_instances from the database db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() LOG.debug("len(db_instances) {}".format(len(db_instances))) # 3. Checks if new instances are ready if not self._all_instances_running(new_instance_ids, cluster_id): LOG.error("instances are not ready yet") return # 4. Loads instances instances = [ Instance.load(context, instance_id) for instance_id in new_instance_ids ] LOG.debug("len(instances) {}".format(len(instances))) # 5. Instances GuestAgent class # 6. Calls cluster_complete endpoint of K2hdkcGuestAgent LOG.debug( "Calling cluster_complete as a final hook to each node in the cluster" ) for instance in instances: self.get_guest(instance).cluster_complete() # 7. reset the current cluster task status to None LOG.debug("reset cluster task to None") self.reset_task() except Timeout: # Note adminstrators should reset task via CLI in this case. if Timeout is not timeout: raise # not my timeout LOG.exception("Timeout for growing cluster.") self.update_statuses_on_failure( cluster_id, status=inst_tasks.InstanceTasks.GROWING_ERROR) finally: timeout.cancel() LOG.debug("Completed grow_cluster for %s.", cluster_id)
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %s: %s" % (cluster_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("all instances in cluster %s ready." % cluster_id) instances = [ Instance.load(context, instance_id) for instance_id in instance_ids ] # filter query routers in instances into a new list: query_routers query_routers = [ instance for instance in instances if instance.type == 'query_router' ] LOG.debug("query routers: %s" % [instance.id for instance in query_routers]) # filter config servers in instances into new list: config_servers config_servers = [ instance for instance in instances if instance.type == 'config_server' ] LOG.debug("config servers: %s" % [instance.id for instance in config_servers]) # filter members (non router/configsvr) into a new list: members members = [ instance for instance in instances if instance.type == 'member' ] LOG.debug("members: %s" % [instance.id for instance in members]) # for config_server in config_servers, append ip/hostname to # "config_server_hosts", then # peel off the replica-set name and ip/hostname from 'x' config_server_ips = [ self.get_ip(instance) for instance in config_servers ] LOG.debug("config server ips: %s" % config_server_ips) if not self._add_query_routers(query_routers, config_server_ips): return if not self._create_shard(query_routers[0], members): return # call to start checking status for instance in instances: self.get_guest(instance).cluster_complete()
def _shrink_cluster(): removal_instances = [ Instance.load(context, instance_id) for instance_id in removal_instance_ids ] for instance in removal_instances: Instance.delete(instance) # wait for instances to be deleted def all_instances_marked_deleted(): non_deleted_instances = DBInstance.find_all( cluster_id=cluster_id, deleted=False).all() non_deleted_ids = [ db_instance.id for db_instance in non_deleted_instances ] return not bool( set(removal_instance_ids).intersection( set(non_deleted_ids))) try: LOG.info("Deleting instances (%s)", removal_instance_ids) utils.poll_until(all_instances_marked_deleted, sleep_time=2, time_out=CONF.cluster_delete_time_out) except PollTimeOut: LOG.error("timeout for instances to be marked as deleted.") return db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() leftover_instances = [ Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in removal_instance_ids ] leftover_cluster_ips = [ self.get_ip(instance) for instance in leftover_instances ] # Get config changes for left over instances rnd_cluster_guest = self.get_guest(leftover_instances[0]) cluster_context = rnd_cluster_guest.get_cluster_context() # apply the new config to all leftover instances for instance in leftover_instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(leftover_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) guest.write_cluster_configuration_overrides( cluster_configuration)
def load(cls): # TODO(pdmars): This should probably be changed to a more generic # database filter query if one is added, however, this should suffice # for now. db_infos = DBInstance.find_all(deleted=False) tenant_ids_for_instances = [db_info.tenant_id for db_info in db_infos] tenant_ids = set(tenant_ids_for_instances) LOG.debug("All tenants with instances: %s" % tenant_ids) accounts = [] for tenant_id in tenant_ids: num_instances = tenant_ids_for_instances.count(tenant_id) accounts.append({'id': tenant_id, 'num_instances': num_instances}) return cls(accounts)
def _create_cluster(): # Fetch instances by cluster_id against instances table. db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") instances = [ Instance.load(context, instance_id) for instance_id in instance_ids ] member_ips = [self.get_ip(instance) for instance in instances] guests = [self.get_guest(instance) for instance in instances] # Users to be configured for password-less SSH. authorized_users_without_password = ['root', 'dbadmin'] # Configuring password-less SSH for cluster members. # Strategy for setting up SSH: # get public keys for user from member-instances in cluster, # combine them, finally push it back to all instances, # and member instances add them to authorized keys. LOG.debug("Configuring password-less SSH on cluster members.") try: for user in authorized_users_without_password: pub_key = [guest.get_public_keys(user) for guest in guests] for guest in guests: guest.authorize_public_keys(user, pub_key) LOG.debug("Installing cluster with members: %s." % member_ips) for db_instance in db_instances: if db_instance['type'] == 'master': master_instance = Instance.load( context, db_instance.id) self.get_guest(master_instance).install_cluster( member_ips) break LOG.debug("Finalizing cluster configuration.") for guest in guests: guest.cluster_complete() except Exception: LOG.exception(_("Error creating cluster.")) self.update_statuses_on_failure(cluster_id)
def create_cluster(self, context, cluster_id): """Create K2hdkcClusterTasks. This function is called in trove.taskmanager.Manager.create_cluster. """ LOG.debug("Begins create_cluster for %s.", cluster_id) # 1. validates args if context is None: LOG.error("no context") return if cluster_id is None: LOG.error("no cluster_id") return timeout = Timeout(CONF.cluster_usage_timeout) LOG.debug("CONF.cluster_usage_timeout %s.", timeout) try: # 2. Retrieves db_instances from the database db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() # 3. Retrieves instance ids from the db_instances instance_ids = [db_instance.id for db_instance in db_instances] # 4. Checks if instances are ready if not self._all_instances_running(instance_ids, cluster_id): LOG.error("instances are not ready yet") return # 5. Loads instances instances = [ Instance.load(context, instance_id) for instance_id in instance_ids ] # 6. Instantiates GuestAgent for each guest instance # 7. Calls cluster_complete endpoint of K2hdkcGuestAgent for instance in instances: self.get_guest(instance).cluster_complete() # 8. reset the current cluster task status to None LOG.debug("reset cluster task to None") self.reset_task() except Timeout: # Note adminstrators should reset task via CLI in this case. if Timeout is not timeout: raise # not my timeout LOG.exception("Timeout for building cluster.") self.update_statuses_on_failure(cluster_id) finally: timeout.cancel() LOG.debug("Completed create_cluster for %s.", cluster_id)
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %s: %s" % (cluster_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("all instances in cluster %s ready." % cluster_id) instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # filter query routers in instances into a new list: query_routers query_routers = [instance for instance in instances if instance.type == 'query_router'] LOG.debug("query routers: %s" % [instance.id for instance in query_routers]) # filter config servers in instances into new list: config_servers config_servers = [instance for instance in instances if instance.type == 'config_server'] LOG.debug("config servers: %s" % [instance.id for instance in config_servers]) # filter members (non router/configsvr) into a new list: members members = [instance for instance in instances if instance.type == 'member'] LOG.debug("members: %s" % [instance.id for instance in members]) # for config_server in config_servers, append ip/hostname to # "config_server_hosts", then # peel off the replica-set name and ip/hostname from 'x' config_server_ips = [self.get_ip(instance) for instance in config_servers] LOG.debug("config server ips: %s" % config_server_ips) if not self._add_query_routers(query_routers, config_server_ips): return if not self._create_shard(query_routers[0], members): return # call to start checking status for instance in instances: self.get_guest(instance).cluster_complete()
def _create_cluster(): # Fetch instances by cluster_id against instances table. db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") instances = [Instance.load(context, instance_id) for instance_id in instance_ids] member_ips = [self.get_ip(instance) for instance in instances] guests = [self.get_guest(instance) for instance in instances] # Users to be configured for password-less SSH. authorized_users_without_password = ['root', 'dbadmin'] # Configuring password-less SSH for cluster members. # Strategy for setting up SSH: # get public keys for user from member-instances in cluster, # combine them, finally push it back to all instances, # and member instances add them to authorized keys. LOG.debug("Configuring password-less SSH on cluster members.") try: for user in authorized_users_without_password: pub_key = [guest.get_public_keys(user) for guest in guests] for guest in guests: guest.authorize_public_keys(user, pub_key) LOG.debug("Installing cluster with members: %s." % member_ips) for db_instance in db_instances: if db_instance['type'] == 'master': master_instance = Instance.load(context, db_instance.id) self.get_guest(master_instance).install_cluster( member_ips) break LOG.debug("Finalizing cluster configuration.") for guest in guests: guest.cluster_complete() except Exception: LOG.exception(_("Error creating cluster.")) self.update_statuses_on_failure(cluster_id)
def _shrink_cluster(): removal_instances = [Instance.load(context, instance_id) for instance_id in removal_instance_ids] for instance in removal_instances: Instance.delete(instance) # wait for instances to be deleted def all_instances_marked_deleted(): non_deleted_instances = DBInstance.find_all( cluster_id=cluster_id, deleted=False).all() non_deleted_ids = [db_instance.id for db_instance in non_deleted_instances] return not bool( set(removal_instance_ids).intersection( set(non_deleted_ids)) ) try: LOG.info(_("Deleting instances (%s)") % removal_instance_ids) utils.poll_until(all_instances_marked_deleted, sleep_time=2, time_out=CONF.cluster_delete_time_out) except PollTimeOut: LOG.error(_("timeout for instances to be marked as deleted.")) return db_instances = DBInstance.find_all(cluster_id=cluster_id).all() leftover_instances = [Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in removal_instance_ids] leftover_cluster_ips = [self.get_ip(instance) for instance in leftover_instances] # Get config changes for left over instances rnd_cluster_guest = self.get_guest(leftover_instances[0]) cluster_context = rnd_cluster_guest.get_cluster_context() # apply the new config to all leftover instances for instance in leftover_instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(leftover_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) guest.write_cluster_configuration_overrides( cluster_configuration)
def shrink(self, instances): """Removes instances from a cluster.""" LOG.debug("Shrinking cluster %s." % self.id) self.validate_cluster_available() removal_instances = [Instance.load(self.context, inst_id) for inst_id in instances] db_instances = DBInstance.find_all(cluster_id=self.db_info.id).all() if len(db_instances) - len(removal_instances) < 1: raise exception.ClusterShrinkMustNotLeaveClusterEmpty() self.db_info.update(task_status=ClusterTasks.SHRINKING_CLUSTER) task_api.load(self.context, self.ds_version.manager).shrink_cluster( self.db_info.id, [instance.id for instance in removal_instances] ) return PXCCluster(self.context, self.db_info, self.ds, self.ds_version)
def _grow_cluster(): LOG.debug("begin grow_cluster for Vertica cluster %s" % cluster_id) db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for new cluster members to get to cluster-ready status. if not self._all_instances_ready(new_instance_ids, cluster_id): return new_insts = [ Instance.load(context, instance_id) for instance_id in new_instance_ids ] existing_instances = [ Instance.load(context, instance_id) for instance_id in instance_ids if instance_id not in new_instance_ids ] existing_guests = [self.get_guest(i) for i in existing_instances] new_guests = [self.get_guest(i) for i in new_insts] all_guests = new_guests + existing_guests authorized_users_without_password = ['root', 'dbadmin'] new_ips = [self.get_ip(instance) for instance in new_insts] for user in authorized_users_without_password: pub_key = [guest.get_public_keys(user) for guest in all_guests] for guest in all_guests: guest.authorize_public_keys(user, pub_key) for db_instance in db_instances: if db_instance['type'] == 'master': LOG.debug("Found 'master' instance, calling grow on guest") master_instance = Instance.load(context, db_instance.id) self.get_guest(master_instance).grow_cluster(new_ips) break for guest in new_guests: guest.cluster_complete()
def _create_cluster(): # Fetch instances by cluster_id against instances table. db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") instances = [ Instance.load(context, instance_id) for instance_id in instance_ids ] # Connect nodes to the first node guests = [self.get_guest(instance) for instance in instances] try: cluster_head = instances[0] cluster_head_port = '6379' cluster_head_ip = self.get_ip(cluster_head) for guest in guests[1:]: guest.cluster_meet(cluster_head_ip, cluster_head_port) num_nodes = len(instances) total_slots = 16384 slots_per_node = total_slots / num_nodes leftover_slots = total_slots % num_nodes first_slot = 0 for guest in guests: last_slot = first_slot + slots_per_node if leftover_slots > 0: leftover_slots -= 1 else: last_slot -= 1 guest.cluster_addslots(first_slot, last_slot) first_slot = last_slot + 1 for guest in guests: guest.cluster_complete() except Exception: LOG.exception(_("Error creating cluster.")) self.update_statuses_on_failure(cluster_id)
def _grow_cluster(): LOG.debug("begin grow_cluster for Vertica cluster %s" % cluster_id) db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for new cluster members to get to cluster-ready status. if not self._all_instances_ready(new_instance_ids, cluster_id): return new_insts = [Instance.load(context, instance_id) for instance_id in new_instance_ids] existing_instances = [Instance.load(context, instance_id) for instance_id in instance_ids if instance_id not in new_instance_ids] existing_guests = [self.get_guest(i) for i in existing_instances] new_guests = [self.get_guest(i) for i in new_insts] all_guests = new_guests + existing_guests authorized_users_without_password = ['root', 'dbadmin'] new_ips = [self.get_ip(instance) for instance in new_insts] for user in authorized_users_without_password: pub_key = [guest.get_public_keys(user) for guest in all_guests] for guest in all_guests: guest.authorize_public_keys(user, pub_key) for db_instance in db_instances: if db_instance['type'] == 'master': LOG.debug("Found 'master' instance, calling grow on guest") master_instance = Instance.load(context, db_instance.id) self.get_guest(master_instance).grow_cluster(new_ips) break for guest in new_guests: guest.cluster_complete()
def _create_cluster(): # Fetch instances by cluster_id against instances table. db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("All members ready, proceeding for cluster setup.") instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # Connect nodes to the first node guests = [self.get_guest(instance) for instance in instances] try: cluster_head = instances[0] cluster_head_port = '6379' cluster_head_ip = self.get_ip(cluster_head) for guest in guests[1:]: guest.cluster_meet(cluster_head_ip, cluster_head_port) num_nodes = len(instances) total_slots = 16384 slots_per_node = total_slots / num_nodes leftover_slots = total_slots % num_nodes first_slot = 0 for guest in guests: last_slot = first_slot + slots_per_node if leftover_slots > 0: leftover_slots -= 1 else: last_slot -= 1 guest.cluster_addslots(first_slot, last_slot) first_slot = last_slot + 1 for guest in guests: guest.cluster_complete() except Exception: LOG.exception(_("Error creating cluster.")) self.update_statuses_on_failure(cluster_id)
def upgrade(migrate_engine): meta.bind = migrate_engine instance_table = Table('instances', meta, autoload=True) session.configure_db(CONF) instances = DBInstance.find_all(datastore_version_id=None) if instances.count() > 0: datastore = DBDatastore.get_by(manager="mysql") datastore = datastore or DBDatastore.create( name="Legacy MySQL", manager="mysql", ) image_table = Table('service_images', meta, autoload=True) image = select( columns=["id", "image_id", "service_name"], from_obj=image_table, whereclause="service_name='mysql'", limit=1 ).execute().fetchone() image_id = "00000000-0000-0000-0000-000000000000" if image: image_id = image.image_id version = DBDatastoreVersion.create( datastore_id=datastore.id, name="Unknown Legacy Version", image_id=image_id, active=False, ) for instance in instances: instance.update_db(datastore_version_id=version.id) instance_table.c.datastore_version_id.alter(nullable=False)
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %s: %s" % (cluster_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("all instances in cluster %s ready." % cluster_id) instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # filter query routers in instances into a new list: query_routers query_routers = [instance for instance in instances if instance.type == 'query_router'] LOG.debug("query routers: %s" % [instance.id for instance in query_routers]) # filter config servers in instances into new list: config_servers config_servers = [instance for instance in instances if instance.type == 'config_server'] LOG.debug("config servers: %s" % [instance.id for instance in config_servers]) # filter members (non router/configsvr) into a new list: members members = [instance for instance in instances if instance.type == 'member'] LOG.debug("members: %s" % [instance.id for instance in members]) # for config_server in config_servers, append ip/hostname to # "config_server_hosts", then # peel off the replica-set name and ip/hostname from 'x' config_server_ips = [self.get_ip(instance) for instance in config_servers] LOG.debug("config server ips: %s" % config_server_ips) # Give the query routers the configsvr ips to connect to. # Create the admin user on the query routers. # The first will create the user, and the others will just reset # the password to the same value. LOG.debug("calling add_config_servers on, and sending admin user " "password to, query_routers") try: admin_created = False admin_password = utils.generate_random_password() for query_router in query_routers: guest = self.get_guest(query_router) guest.add_config_servers(config_server_ips) if admin_created: guest.store_admin_password(admin_password) else: guest.create_admin_user(admin_password) admin_created = True except Exception: LOG.exception(_("error adding config servers")) self.update_statuses_on_failure(cluster_id) return if not self._create_replica_set(members, cluster_id): return replica_set_name = "rs1" if not self._create_shard(query_routers, replica_set_name, members, cluster_id): return # call to start checking status for instance in instances: self.get_guest(instance).cluster_complete()
def _grow_cluster(): db_instances = DBInstance.find_all( cluster_id=cluster_id, deleted=False).all() existing_instances = [Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in new_instance_ids] if not existing_instances: raise TroveError(_("Unable to determine existing cluster " "member(s)")) # get list of ips of existing cluster members existing_cluster_ips = [self.get_ip(instance) for instance in existing_instances] existing_instance_guests = [self.get_guest(instance) for instance in existing_instances] # get the cluster context to setup new members cluster_context = existing_instance_guests[0].get_cluster_context() # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(new_instance_ids, cluster_id): raise TroveError(_("Instances in cluster did not report " "ACTIVE")) LOG.debug("All members ready, proceeding for cluster setup.") # Get the new instances to join the cluster new_instances = [Instance.load(context, instance_id) for instance_id in new_instance_ids] new_cluster_ips = [self.get_ip(instance) for instance in new_instances] for instance in new_instances: guest = self.get_guest(instance) guest.reset_admin_password(cluster_context['admin_password']) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(existing_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) # push the cluster config and bootstrap the first instance bootstrap = False guest.install_cluster(cluster_context['replication_user'], cluster_configuration, bootstrap) self._check_cluster_for_root(context, existing_instances, new_instances) # apply the new config to all instances for instance in existing_instances + new_instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(existing_cluster_ips + new_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) guest.write_cluster_configuration_overrides( cluster_configuration) for instance in new_instances: guest = self.get_guest(instance) guest.cluster_complete()
def _find_cluster_node_ids(self, tenant_id, cluster_id): args = {'tenant_id': tenant_id, 'cluster_id': cluster_id} cluster_instances = DBInstance.find_all(**args).all() return [db_instance.id for db_instance in cluster_instances]
def find_cluster_node_ids(cls, cluster_id): db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() return [db_instance.id for db_instance in db_instances]
def _find_query_router_ids(self, tenant_id, cluster_id): args = {'tenant_id': tenant_id, 'cluster_id': cluster_id, 'type': 'query_router'} query_router_instances = DBInstance.find_all(**args).all() return [db_instance.id for db_instance in query_router_instances]
def shrink_cluster(self, context, cluster_id, removal_ids): """Shrink a K2hdkc Cluster.""" LOG.debug( "Begins shrink_cluster for %s. removal_ids:{}".format(removal_ids), cluster_id) # 1. validates args if context is None: LOG.error("no context") return if cluster_id is None: LOG.error("no cluster_id") return if removal_ids is None: LOG.error("no removal_ids") return timeout = Timeout(CONF.cluster_usage_timeout) try: # 2. Retrieves db_instances from the database db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() # 3. Retrieves instance ids from the db_instances instance_ids = [db_instance.id for db_instance in db_instances] # 4. Checks if instances are running if not self._all_instances_running(instance_ids, cluster_id): LOG.error("instances are not ready yet") return # 4. Loads instances instances = [ Instance.load(context, instance_id) for instance_id in removal_ids ] LOG.debug("len(instances) {}".format(len(instances))) # 5. Instances GuestAgent class # 6.2. Checks if removing instances are # if not self._all_instances_shutdown(removal_ids, cluster_id): # LOG.error("removing instances are not shutdown yet") # return # 7. Calls cluster_complete endpoint of K2hdkcGuestAgent LOG.debug( "Calling cluster_complete as a final hook to each node in the cluster" ) for instance in instances: self.get_guest(instance).cluster_complete() # 8. delete node from OpenStack LOG.debug("delete node from OpenStack") for instance in instances: Instance.delete(instance) # 9. reset the current cluster task status to None LOG.debug("reset cluster task to None") self.reset_task() except Timeout: # Note adminstrators should reset task via CLI in this case. if Timeout is not timeout: raise # not my timeout LOG.exception("Timeout for shrink cluster.") self.update_statuses_on_failure( cluster_id, status=inst_tasks.InstanceTasks.SHRINKING_ERROR) finally: timeout.cancel() LOG.debug("Completed shrink_cluster for %s.", cluster_id)
def _create_cluster(): # Fetch instances by cluster_id against instances table. db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("Waiting for instances to get to cluster-ready status.") # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(instance_ids, cluster_id): raise TroveError( _("Instances in cluster did not report " "ACTIVE")) LOG.debug("All members ready, proceeding for cluster setup.") instances = [ Instance.load(context, instance_id) for instance_id in instance_ids ] cluster_ips = [self.get_ip(instance) for instance in instances] instance_guests = [ self.get_guest(instance) for instance in instances ] # Create replication user and password for synchronizing the # galera cluster replication_user = { "name": self.CLUSTER_REPLICATION_USER, "password": utils.generate_random_password(), } # Galera cluster name must be unique and be shorter than a full # uuid string so we remove the hyphens and chop it off. It was # recommended to be 16 chars or less. # (this is not currently documented on Galera docs) cluster_name = utils.generate_uuid().replace("-", "")[:16] LOG.debug("Configuring cluster configuration.") try: # Set the admin password for all the instances because the # password in the my.cnf will be wrong after the joiner # instances syncs with the donor instance. admin_password = str(utils.generate_random_password()) for guest in instance_guests: guest.reset_admin_password(admin_password) bootstrap = True for instance in instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(cluster_ips), cluster_name, replication_user) # push the cluster config and bootstrap the first instance guest.install_cluster(replication_user, cluster_configuration, bootstrap) bootstrap = False LOG.debug("Finalizing cluster configuration.") for guest in instance_guests: guest.cluster_complete() except Exception: LOG.exception(_("Error creating cluster.")) self.update_statuses_on_failure(cluster_id)
def _grow_cluster(): db_instances = DBInstance.find_all(cluster_id=cluster_id, deleted=False).all() existing_instances = [ Instance.load(context, db_inst.id) for db_inst in db_instances if db_inst.id not in new_instance_ids ] if not existing_instances: raise TroveError( _("Unable to determine existing cluster " "member(s)")) # get list of ips of existing cluster members existing_cluster_ips = [ self.get_ip(instance) for instance in existing_instances ] existing_instance_guests = [ self.get_guest(instance) for instance in existing_instances ] # get the cluster context to setup new members cluster_context = existing_instance_guests[0].get_cluster_context() # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(new_instance_ids, cluster_id): raise TroveError( _("Instances in cluster did not report " "ACTIVE")) LOG.debug("All members ready, proceeding for cluster setup.") # Get the new instances to join the cluster new_instances = [ Instance.load(context, instance_id) for instance_id in new_instance_ids ] new_cluster_ips = [ self.get_ip(instance) for instance in new_instances ] for instance in new_instances: guest = self.get_guest(instance) guest.reset_admin_password(cluster_context['admin_password']) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(existing_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) # push the cluster config and bootstrap the first instance bootstrap = False guest.install_cluster(cluster_context['replication_user'], cluster_configuration, bootstrap) self._check_cluster_for_root(context, existing_instances, new_instances) # apply the new config to all instances for instance in existing_instances + new_instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(existing_cluster_ips + new_cluster_ips), cluster_context['cluster_name'], cluster_context['replication_user']) guest.write_cluster_configuration_overrides( cluster_configuration) for instance in new_instances: guest = self.get_guest(instance) guest.cluster_complete()
def _get_cluster_instance_id(self, tenant_id, cluster_id): args = {'tenant_id': tenant_id, 'cluster_id': cluster_id} cluster_instances = DBInstance.find_all(**args).all() instance_ids = [db_instance.id for db_instance in cluster_instances] instance_id = instance_ids[0] return (instance_id, instance_ids)
def load(context, id): db_infos = DBInstance.find_all(tenant_id=id, deleted=False) instance_ids = [] for db_info in db_infos: instance_ids.append(db_info.id) return Account(id, instance_ids)
def _create_cluster(): # fetch instances by cluster_id against instances table db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("instances in cluster %s: %s" % (cluster_id, instance_ids)) if not self._all_instances_ready(instance_ids, cluster_id): return LOG.debug("all instances in cluster %s ready." % cluster_id) instances = [Instance.load(context, instance_id) for instance_id in instance_ids] # filter query routers in instances into a new list: query_routers query_routers = [instance for instance in instances if instance.type == 'query_router'] LOG.debug("query routers: %s" % [instance.id for instance in query_routers]) # filter config servers in instances into new list: config_servers config_servers = [instance for instance in instances if instance.type == 'config_server'] LOG.debug("config servers: %s" % [instance.id for instance in config_servers]) # filter members (non router/configsvr) into a new list: members members = [instance for instance in instances if instance.type == 'member'] LOG.debug("members: %s" % [instance.id for instance in members]) # for config_server in config_servers, append ip/hostname to # "config_server_hosts", then # peel off the replica-set name and ip/hostname from 'x' config_server_ips = [self.get_ip(instance) for instance in config_servers] LOG.debug("config server ips: %s" % config_server_ips) # Give the query routers the configsvr ips to connect to. # Create the admin user on the query routers. # The first will create the user, and the others will just reset # the password to the same value. LOG.debug("calling add_config_servers on, and sending admin user " "password to, query_routers") try: admin_created = False admin_password = utils.generate_random_password() for query_router in query_routers: guest = self.get_guest(query_router) guest.add_config_servers(config_server_ips) if admin_created: guest.store_admin_password(admin_password) else: guest.create_admin_user(admin_password) admin_created = True except Exception: LOG.exception(_("error adding config servers")) self.update_statuses_on_failure(cluster_id) return if not self._create_replica_set(members, cluster_id): return replica_set_name = "rs1" if not self._create_shard(query_routers, replica_set_name, members, cluster_id): return # call to start checking status for instance in instances: self.get_guest(instance).cluster_complete()
def find_cluster_node_ids(cls, cluster_id): db_instances = DBInstance.find_all(cluster_id=cluster_id).all() return [db_instance.id for db_instance in db_instances]
def _create_cluster(): # Fetch instances by cluster_id against instances table. db_instances = DBInstance.find_all(cluster_id=cluster_id).all() instance_ids = [db_instance.id for db_instance in db_instances] LOG.debug("Waiting for instances to get to cluster-ready status.") # Wait for cluster members to get to cluster-ready status. if not self._all_instances_ready(instance_ids, cluster_id): raise TroveError(_("Instances in cluster did not report " "ACTIVE")) LOG.debug("All members ready, proceeding for cluster setup.") instances = [Instance.load(context, instance_id) for instance_id in instance_ids] cluster_ips = [self.get_ip(instance) for instance in instances] instance_guests = [self.get_guest(instance) for instance in instances] # Create replication user and password for synchronizing the # galera cluster replication_user = { "name": self.CLUSTER_REPLICATION_USER, "password": utils.generate_random_password(), } # Galera cluster name must be unique and be shorter than a full # uuid string so we remove the hyphens and chop it off. It was # recommended to be 16 chars or less. # (this is not currently documented on Galera docs) cluster_name = utils.generate_uuid().replace("-", "")[:16] LOG.debug("Configuring cluster configuration.") try: # Set the admin password for all the instances because the # password in the my.cnf will be wrong after the joiner # instances syncs with the donor instance. admin_password = str(utils.generate_random_password()) for guest in instance_guests: guest.reset_admin_password(admin_password) bootstrap = True for instance in instances: guest = self.get_guest(instance) # render the conf.d/cluster.cnf configuration cluster_configuration = self._render_cluster_config( context, instance, ",".join(cluster_ips), cluster_name, replication_user) # push the cluster config and bootstrap the first instance guest.install_cluster(replication_user, cluster_configuration, bootstrap) bootstrap = False LOG.debug("Finalizing cluster configuration.") for guest in instance_guests: guest.cluster_complete() except Exception: LOG.exception(_("Error creating cluster.")) self.update_statuses_on_failure(cluster_id)