def tearDown(self): super(MySqlAppStatusTest, self).tearDown() dbaas.utils.execute_with_timeout = self.orig_utils_execute_with_timeout dbaas.load_mysqld_options = self.orig_load_mysqld_options dbaas.os.path.exists = self.orig_dbaas_os_path_exists dbaas.time.sleep = self.orig_dbaas_time_sleep InstanceServiceStatus.find_by(instance_id=self.FAKE_ID).delete() dbaas.CONF.guest_id = None
def tearDown(self): super(MySqlAppStatusTest, self).tearDown() dbaas.utils.execute_with_timeout = self.orig_utils_execute_with_timeout dbaas.load_mysqld_options = self.orig_load_mysqld_options dbaas.os.path.exists = self.orig_dbaas_os_path_exists dbaas.time.sleep = self.orig_dbaas_time_sleep InstanceServiceStatus.find_by(instance_id=self.FAKE_ID).delete() dbaas.CONF.guest_id = None
def update_statuses_on_time_out(self): if CONF.update_status_on_fail: #Updating service status service = InstanceServiceStatus.find_by(instance_id=self.id) service.set_status(ServiceStatuses.FAILED_TIMEOUT_GUESTAGENT) service.save() LOG.error( _("Service status: %(status)s") % { 'status': ServiceStatuses.FAILED_TIMEOUT_GUESTAGENT.api_status }) LOG.error( _("Service error description: %(desc)s") % { 'desc': ServiceStatuses.FAILED_TIMEOUT_GUESTAGENT.description }) #Updating instance status db_info = DBInstance.find_by(name=self.name) db_info.set_task_status(InstanceTasks.BUILDING_ERROR_TIMEOUT_GA) db_info.save() LOG.error( _("Trove instance status: %(action)s") % {'action': InstanceTasks.BUILDING_ERROR_TIMEOUT_GA.action}) LOG.error( _("Trove instance status description: %(text)s") % {'text': InstanceTasks.BUILDING_ERROR_TIMEOUT_GA.db_text})
def monitor_ha(self, context): """Monitors the status of MySQL masters to make sure they are up.""" LOG.debug("Monitoring Trove Replica Sources (Masters)") db_infos = t_models.DBInstance.find_all(deleted=False) masters_to_watch = [(instance.slave_of_id, instance) for instance in db_infos.all() if instance.slave_of_id and instance.task_status == InstanceTasks.NONE] LOG.debug("Monitoring %s", masters_to_watch) for (master_id, slave) in masters_to_watch: master = t_models.DBInstance.find_by(deleted=False, id=master_id) service = InstanceServiceStatus.find_by(instance_id=master_id) if self._engage_failover(service): LOG.debug("Engage FAILOVER from %s to %s NOW!", master_id, slave.id) master = t_models.DBInstance.find_by(id=master_id, deleted=False) self._create_and_auth_clients() # Failover Slave to Master by detaching replica source on slave self.trove_client.instances.edit(slave.id, detach_replica_source=True) # Update DNS records for master and slave master_dns, slave_dns = self._update_dns_records(master, slave) # Finally update the hostnames in trove to # reflect the updated DNS information self._reflect_dns_updates_in_trove(master, master_dns, slave, slave_dns)
def update_statuses_on_time_out(self): if CONF.update_status_on_fail: #Updating service status service = InstanceServiceStatus.find_by(instance_id=self.id) service.set_status(ServiceStatuses. FAILED_TIMEOUT_GUESTAGENT) service.save() LOG.error(_("Service status: %(status)s") % {'status': ServiceStatuses. FAILED_TIMEOUT_GUESTAGENT.api_status}) LOG.error(_("Service error description: %(desc)s") % {'desc': ServiceStatuses. FAILED_TIMEOUT_GUESTAGENT.description}) #Updating instance status db_info = DBInstance.find_by(name=self.name) db_info.set_task_status(InstanceTasks. BUILDING_ERROR_TIMEOUT_GA) db_info.save() LOG.error(_("Trove instance status: %(action)s") % {'action': InstanceTasks. BUILDING_ERROR_TIMEOUT_GA.action}) LOG.error(_("Trove instance status description: %(text)s") % {'text': InstanceTasks. BUILDING_ERROR_TIMEOUT_GA.db_text})
def _service_is_active(self): """ Check that the database guest is active. This function is meant to be called with poll_until to check that the guest is alive before sending a 'create' message. This prevents over billing a customer for a instance that they can never use. Returns: boolean if the service is active. Raises: TroveError if the service is in a failure state. """ service = InstanceServiceStatus.find_by(instance_id=self.id) status = service.get_status() if status == rd_instance.ServiceStatuses.RUNNING: return True elif status not in [ rd_instance.ServiceStatuses.NEW, rd_instance.ServiceStatuses.BUILDING ]: raise TroveError(_("Service not active, status: %s") % status) c_id = self.db_info.compute_instance_id nova_status = self.nova_client.servers.get(c_id).status if nova_status in [InstanceStatus.ERROR, InstanceStatus.FAILED]: raise TroveError(_("Server not active, status: %s") % nova_status) return False
def _service_is_active(self): """ Check that the database guest is active. This function is meant to be called with poll_until to check that the guest is alive before sending a 'create' message. This prevents over billing a customer for a instance that they can never use. Returns: boolean if the service is active. Raises: TroveError if the service is in a failure state. """ service = InstanceServiceStatus.find_by(instance_id=self.id) status = service.get_status() if status == rd_instance.ServiceStatuses.RUNNING: return True elif status not in [rd_instance.ServiceStatuses.NEW, rd_instance.ServiceStatuses.BUILDING]: raise TroveError(_("Service not active, status: %s") % status) c_id = self.db_info.compute_instance_id nova_status = self.nova_client.servers.get(c_id).status if nova_status in [InstanceStatus.ERROR, InstanceStatus.FAILED]: raise TroveError(_("Server not active, status: %s") % nova_status) return False
def _set_status(self, new_status="RUNNING"): from trove.instance.models import InstanceServiceStatus print("Setting status to %s" % new_status) states = {"RUNNING": rd_instance.ServiceStatuses.RUNNING, "SHUTDOWN": rd_instance.ServiceStatuses.SHUTDOWN} status = InstanceServiceStatus.find_by(instance_id=self.id) status.status = states[new_status] status.save()
def update_db(): status = InstanceServiceStatus.find_by(instance_id=self.id) if instance_name.endswith('GUEST_ERROR'): status.status = srvstatus.ServiceStatuses.FAILED else: status.status = srvstatus.ServiceStatuses.HEALTHY status.save() AgentHeartBeat.create(instance_id=self.id)
def update_db(): status = InstanceServiceStatus.find_by(instance_id=self.id) if instance_name.endswith('GUEST_ERROR'): status.status = rd_instance.ServiceStatuses.FAILED else: status.status = rd_instance.ServiceStatuses.RUNNING status.save() AgentHeartBeat.create(instance_id=self.id)
def _set_status(self, new_status='RUNNING'): from trove.instance.models import InstanceServiceStatus print("Setting status to %s" % new_status) states = {'RUNNING': rd_instance.ServiceStatuses.RUNNING, 'SHUTDOWN': rd_instance.ServiceStatuses.SHUTDOWN, } status = InstanceServiceStatus.find_by(instance_id=self.id) status.status = states[new_status] status.save()
def _set_task_status(self, new_status='RUNNING'): from trove.instance.models import InstanceServiceStatus print("Setting status to %s" % new_status) states = {'RUNNING': rd_instance.ServiceStatuses.RUNNING, 'SHUTDOWN': rd_instance.ServiceStatuses.SHUTDOWN, } status = InstanceServiceStatus.find_by(instance_id=self.id) status.status = states[new_status] status.save()
def _set_task_status(self, new_status='HEALTHY'): from trove.instance.models import InstanceServiceStatus print("Setting status to %s" % new_status) states = { 'HEALTHY': srvstatus.ServiceStatuses.HEALTHY, 'SHUTDOWN': srvstatus.ServiceStatuses.SHUTDOWN, } status = InstanceServiceStatus.find_by(instance_id=self.id) status.status = states[new_status] status.save()
def _instance_ids_with_failures(ids): LOG.debug("Checking for service status failures for " "instance ids: %s" % ids) failed_instance_ids = [] for instance_id in ids: status = InstanceServiceStatus.find_by( instance_id=instance_id).get_status() if (status == ServiceStatuses.FAILED or status == ServiceStatuses.FAILED_TIMEOUT_GUESTAGENT): failed_instance_ids.append(instance_id) return failed_instance_ids
def __call__(self): audit_start, audit_end = NotificationTransformer._get_audit_period() messages = [] db_infos = instance_models.DBInstance.find_all(deleted=False) for db_info in db_infos: service_status = InstanceServiceStatus.find_by( instance_id=db_info.id) instance = SimpleMgmtInstance(None, db_info, None, service_status) message = self.transform_instance(instance, audit_start, audit_end) messages.append(message) return messages
def __call__(self): audit_start, audit_end = NotificationTransformer._get_audit_period() messages = [] db_infos = instance_models.DBInstance.find_all(deleted=False) for db_info in db_infos: service_status = InstanceServiceStatus.find_by( instance_id=db_info.id) instance = SimpleMgmtInstance(None, db_info, None, service_status) message = self.transform_instance(instance, audit_start, audit_end) messages.append(message) return messages
def _instance_ids_with_failures(ids): LOG.debug("Checking for service status failures for " "instance ids: %s" % ids) failed_instance_ids = [] for instance_id in ids: status = InstanceServiceStatus.find_by( instance_id=instance_id).get_status() if (status == ServiceStatuses.FAILED or status == ServiceStatuses.FAILED_TIMEOUT_GUESTAGENT): failed_instance_ids.append(instance_id) return failed_instance_ids
def __call__(self): audit_start, audit_end = NotificationTransformer._get_audit_period() messages = [] db_infos = instance_models.DBInstance.find_all(deleted=False) for db_info in db_infos: try: service_status = InstanceServiceStatus.find_by(instance_id=db_info.id) except exception.ModelNotFoundError: # There is a small window of opportunity during when the db # resource for an instance exists, but no InstanceServiceStatus # for it has yet been created. We skip sending the notification # message for all such instances. These instance are too new # and will get picked up the next round of notifications. LOG.debug("InstanceServiceStatus not found for %s. " "Will wait to send notification." % db_info.id) continue instance = SimpleMgmtInstance(None, db_info, None, service_status) message = self.transform_instance(instance, audit_start, audit_end) messages.append(message) return messages
def _all_status_ready(ids): LOG.debug("Checking service status of instance ids: %s" % ids) for instance_id in ids: status = InstanceServiceStatus.find_by( instance_id=instance_id).get_status() if (status == ServiceStatuses.FAILED or status == ServiceStatuses.FAILED_TIMEOUT_GUESTAGENT): # if one has failed, no need to continue polling LOG.debug("Instance %s in %s, exiting polling." % (instance_id, status)) return True if (status != ServiceStatuses.RUNNING and status != ServiceStatuses.BUILD_PENDING): # if one is not in a ready state, continue polling LOG.debug("Instance %s in %s, continue polling." % (instance_id, status)) return False LOG.debug("Instances are ready, exiting polling for: %s" % ids) return True
def _all_status_ready(ids): LOG.debug("Checking service status of instance ids: %s" % ids) for instance_id in ids: status = InstanceServiceStatus.find_by( instance_id=instance_id).get_status() if (status == ServiceStatuses.FAILED or status == ServiceStatuses.FAILED_TIMEOUT_GUESTAGENT): # if one has failed, no need to continue polling LOG.debug("Instance %s in %s, exiting polling." % ( instance_id, status)) return True if (status != ServiceStatuses.RUNNING and status != ServiceStatuses.BUILD_PENDING): # if one is not in a ready state, continue polling LOG.debug("Instance %s in %s, continue polling." % ( instance_id, status)) return False LOG.debug("Instances are ready, exiting polling for: %s" % ids) return True
def __init__(self, host_info): self.name = host_info.name self.percent_used = host_info.percentUsed self.total_ram = host_info.totalRAM self.used_ram = host_info.usedRAM self.instances = host_info.instances for instance in self.instances: instance["server_id"] = instance["uuid"] del instance["uuid"] try: db_info = DBInstance.find_by(compute_instance_id=instance["server_id"]) instance["id"] = db_info.id instance["tenant_id"] = db_info.tenant_id status = InstanceServiceStatus.find_by(instance_id=db_info.id) instance_info = SimpleInstance(None, db_info, status) instance["status"] = instance_info.status except exception.TroveError as re: LOG.error(re) LOG.error("Compute Instance ID found with no associated RD " "instance: %s" % instance["server_id"]) instance["id"] = None
def __call__(self): audit_start, audit_end = NotificationTransformer._get_audit_period() messages = [] db_infos = instance_models.DBInstance.find_all(deleted=False) for db_info in db_infos: try: service_status = InstanceServiceStatus.find_by( instance_id=db_info.id) except exception.ModelNotFoundError: # There is a small window of opportunity during when the db # resource for an instance exists, but no InstanceServiceStatus # for it has yet been created. We skip sending the notification # message for all such instances. These instance are too new # and will get picked up the next round of notifications. LOG.debug("InstanceServiceStatus not found for %s. " "Will wait to send notification." % db_info.id) continue instance = SimpleMgmtInstance(None, db_info, None, service_status) message = self.transform_instance(instance, audit_start, audit_end) messages.append(message) return messages
def __init__(self, host_info): self.name = host_info.name self.percent_used = host_info.percentUsed self.total_ram = host_info.totalRAM self.used_ram = host_info.usedRAM self.instances = host_info.instances for instance in self.instances: instance['server_id'] = instance['uuid'] del instance['uuid'] try: db_info = DBInstance.find_by( compute_instance_id=instance['server_id']) instance['id'] = db_info.id instance['tenant_id'] = db_info.tenant_id status = InstanceServiceStatus.find_by(instance_id=db_info.id) instance_info = SimpleInstance(None, db_info, status) instance['status'] = instance_info.status except exception.TroveError as re: LOG.error(re) LOG.error("Compute Instance ID found with no associated RD " "instance: %s" % instance['server_id']) instance['id'] = None
def _set_service_status_to_paused(self): status = InstanceServiceStatus.find_by(instance_id=self.id) status.set_status(rd_instance.ServiceStatuses.PAUSED) status.save()
def _refresh_compute_service_status(self): """Refreshes the service status info for an instance.""" service = InstanceServiceStatus.find_by(instance_id=self.id) self.service_status = service.get_status()
def tearDown(self): super(MySqlAppTest, self).tearDown() dbaas.utils.execute_with_timeout = self.orig_utils_execute_with_timeout dbaas.time.sleep = self.orig_time_sleep InstanceServiceStatus.find_by(instance_id=self.FAKE_ID).delete()
def assert_reported_status(self, expected_status): service_status = InstanceServiceStatus.find_by( instance_id=self.FAKE_ID) self.assertEqual(expected_status, service_status.status)
def _load_status(self): return InstanceServiceStatus.find_by(instance_id=self.id)
def tearDown(self): super(BaseDbStatusTest, self).tearDown() dbaas.time.sleep = self.orig_dbaas_time_sleep InstanceServiceStatus.find_by(instance_id=self.FAKE_ID).delete() dbaas.CONF.guest_id = None
def tearDown(self): super(MySqlAppTest, self).tearDown() dbaas.utils.execute_with_timeout = self.orig_utils_execute_with_timeout time.sleep = self.orig_time_sleep InstanceServiceStatus.find_by(instance_id=self.FAKE_ID).delete()
def _set_service_status_to_paused(self): status = InstanceServiceStatus.find_by(instance_id=self.id) status.set_status(inst_models.ServiceStatuses.PAUSED) status.save()
def set_server_running(): instance = DBInstance.find_by(compute_instance_id=id) LOG.debug("Setting server %s to running" % instance.id) status = InstanceServiceStatus.find_by(instance_id=instance.id) status.status = rd_instance.ServiceStatuses.RUNNING status.save()
def _refresh_compute_service_status(self): """Refreshes the service status info for an instance.""" service = InstanceServiceStatus.find_by(instance_id=self.id) self.service_status = service.get_status()
def assert_reported_status(self, expected_status): service_status = InstanceServiceStatus.find_by( instance_id=self.FAKE_ID) self.assertEqual(expected_status, service_status.status)
def _load_status(self): return InstanceServiceStatus.find_by(instance_id=self.id)
def set_server_running(): instance = DBInstance.find_by(compute_instance_id=id) LOG.debug("Setting server %s to running" % instance.id) status = InstanceServiceStatus.find_by(instance_id=instance.id) status.status = rd_instance.ServiceStatuses.RUNNING status.save()
def tearDown(self): super(BaseDbStatusTest, self).tearDown() dbaas.time.sleep = self.orig_dbaas_time_sleep InstanceServiceStatus.find_by(instance_id=self.FAKE_ID).delete() dbaas.CONF.guest_id = None