def add_failed_host(self, notification_id, notification_hostname, notification_cluster_port, retry_mode): """ Node recover start thread : This thread starts the VM recover execution thread, only the number of existing vm in the recovery target node. :param notification_id: The notification ID included in the notification :param notification_hostname: The host name of the failure node that is included in the notification """ try: self.rc_config.set_request_context() db_engine = dbapi.get_engine(self.rc_config) session = dbapi.get_session(db_engine) conf_dict = self.rc_config.get_value('recover_starter') recovery_max_retry_cnt = conf_dict.get('recovery_max_retry_cnt') recovery_retry_interval = conf_dict.get('recovery_retry_interval') vm_list = self.rc_util_api.fetch_servers_on_hypervisor( notification_hostname) # Count vm_list if len(vm_list) == 0: msg = "There is no instance in " + notification_hostname + "." LOG.info(msg) # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return else: msg = "Do get_all_notification_list_by_id_for_update." LOG.info(msg) result = dbapi.get_all_notification_list_by_id_for_update( session, notification_id) msg = "Succeeded in " \ + "get_all_notification_list_by_id_for_update. " \ + "Return_value = " + str(result) LOG.info(msg) recover_to = result.pop().recover_to if retry_mode is False: msg = "Do get_all_reserve_list_by_hostname_not_deleted." LOG.info(msg) cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted( session, recover_to) msg = "Succeeded in " \ + "get_all_reserve_list_by_hostname_not_deleted. " \ + "Return_value = " + str(cnt) LOG.info(msg) if not cnt: msg = "Do " \ + "get_one_reserve_list_by_cluster_port_for_update." LOG.info(msg) cnt = dbapi.\ get_one_reserve_list_by_cluster_port_for_update( session, notification_cluster_port, notification_hostname ) msg = "Succeeded in " \ + "get_one_reserve_list_by_cluster_port_for_update. " \ + "Return_value = " + str(cnt) LOG.info(msg) if not cnt: msg = "The reserve node not exist in " \ "reserve_list DB, " \ "so do not recover instances." LOG.warning(msg) self.rc_util_db.update_notification_list_db( 'progress', 3, notification_id) return result = cnt.pop() recover_to = result.hostname update_at = datetime.datetime.now() msg = "Do " \ + "update_notification_list_by_notification_id_recover_to." LOG.info(msg) dbapi.update_notification_list_by_notification_id_recover_to( session, notification_id, update_at, recover_to) msg = "Succeeded in " \ + "update_notification_list_by_notification_id_recover_to." LOG.info(msg) delete_at = datetime.datetime.now() msg = "Do update_reserve_list_by_hostname_as_deleted." LOG.info(msg) dbapi.update_reserve_list_by_hostname_as_deleted( session, recover_to, delete_at) msg = "Succeeded in " \ + "update_reserve_list_by_hostname_as_deleted." LOG.info(msg) # create semaphore (Multiplicity is get from config.) conf_dict = self.rc_config.get_value('recover_starter') sem_recovery_instance = threading.Semaphore( int(conf_dict.get('semaphore_multiplicity'))) incomplete_list = [] for i in range(0, int(recovery_max_retry_cnt)): incomplete_list = [] for vm_uuid in vm_list: primary_id = self._create_vm_list_db_for_failed_host( session, notification_id, vm_uuid) if primary_id: if retry_mode is True: # Skip recovery_instance thread. Will delegate to # ... msg = "RETRY MODE. Skip recovery_instance thread" \ + " vm_uuide=" + vm_uuid \ + " notification_id=" + notification_id LOG.info(msg) else: msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) LOG.info(msg) thread_name = self.rc_util.make_thread_name( VM_LIST, primary_id) threading.Thread( target=self.rc_worker.recovery_instance, name=thread_name, args=(vm_uuid, primary_id, sem_recovery_instance)).start() else: if retry_mode is True: continue else: incomplete_list.append(vm_uuid) if incomplete_list: vm_list = incomplete_list greenthread.sleep(int(recovery_retry_interval)) else: break for vm_uuid in incomplete_list: primary_id = self.rc_util_db.insert_vm_list_db( session, notification_id, vm_uuid, 0) # Skip recovery_instance thread. Will delegate to ... msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) LOG.info(msg) thread_name = self.rc_util.make_thread_name( VM_LIST, primary_id) threading.Thread(target=self.rc_worker.recovery_instance, name=thread_name, args=(vm_uuid, primary_id, sem_recovery_instance)).start() # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return except KeyError: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) return except: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) return
def add_failed_host(self, notification_id, notification_hostname, notification_cluster_port, retry_mode): """ Node recover start thread : This thread starts the VM recover execution thread, only the number of existing vm in the recovery target node. :param notification_id: The notification ID included in the notification :param notification_hostname: The host name of the failure node that is included in the notification """ try: db_engine = dbapi.get_engine() session = dbapi.get_session(db_engine) conf_dict = self.rc_config.get_value("recover_starter") recovery_max_retry_cnt = conf_dict.get("recovery_max_retry_cnt") recovery_retry_interval = conf_dict.get("recovery_retry_interval") vm_list = self.rc_util_api.fetch_servers_on_hypervisor(notification_hostname) # Count vm_list if len(vm_list) == 0: self.rc_util.syslogout_ex("RecoveryControllerStarter_0014", syslog.LOG_INFO) msg = "There is no instance in " + notification_hostname + "." self.rc_util.syslogout(msg, syslog.LOG_INFO) # update record in notification_list self.rc_util_db.update_notification_list_db(session, "progress", 2, notification_id) return else: result = dbapi.get_all_notification_list_by_id_for_update(session, notification_id) recover_to = result.pop().recover_to if retry_mode is False: cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted(session, recover_to) if not cnt: cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update( session, notification_cluster_port, notification_hostname ) if not cnt: self.rc_util.syslogout_ex("RecoveryControllerStarter_0022", syslog.LOG_WARNING) msg = "The reserve node not exist in " "reserve_list DB, " "so do not recover instances." self.rc_util.syslogout(msg, syslog.LOG_WARNING) self.rc_util_db.update_notification_list_db("progress", 3, notification_id) return result = cnt.pop() recover_to = result.hostname update_at = datetime.datetime.now() dbapi.update_notification_list_by_notification_id_recover_to( session, notification_id, update_at, recover_to ) self.rc_util.syslogout_ex("RecoveryControllerStarter_0024", syslog.LOG_INFO) self.rc_util.syslogout_ex("RecoveryControllerStarter_0015", syslog.LOG_INFO) delete_at = datetime.datetime.now() dbapi.update_reserve_list_by_hostname_as_deleted(session, recover_to, delete_at) # create semaphore (Multiplicity is get from config.) conf_dict = self.rc_config.get_value("recover_starter") sem_recovery_instance = threading.Semaphore(int(conf_dict.get("semaphore_multiplicity"))) incomplete_list = [] for i in range(0, int(recovery_max_retry_cnt)): incomplete_list = [] for vm_uuid in vm_list: primary_id = self._create_vm_list_db_for_failed_host(session, notification_id, vm_uuid) if primary_id: if retry_mode == True: # Skip recovery_instance thread. Will delegate to # ... msg = ( "RETRY MODE. Skip recovery_instance thread" + " vm_uuide=" + vm_uuid + " notification_id=" + notification_id ) self.rc_util.syslogout(msg, syslog.LOG_INFO) else: msg = ( "Run thread rc_worker.recovery_instance." + " vm_uuid=" + vm_uuid + " primary_id=" + str(primary_id) ) self.rc_util.syslogout(msg, syslog.LOG_INFO) threading.Thread( target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance), ).start() else: if retry_mode == True: continue else: incomplete_list.append(vm_uuid) if incomplete_list: vm_list = incomplete_list greenthread.sleep(int(recovery_retry_interval)) else: break for vm_uuid in incomplete_list: primary_id = self.rc_util_db.insert_vm_list_db(session, notification_id, vm_uuid, 0) # Skip recovery_instance thread. Will delegate to ... self.rc_util.syslogout_ex("RecoveryControllerStarter_0031", syslog.LOG_INFO) msg = ( "Run thread rc_worker.recovery_instance." + " vm_uuid=" + vm_uuid + " primary_id=" + str(primary_id) ) self.rc_util.syslogout(msg, syslog.LOG_INFO) threading.Thread( target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance) ).start() # update record in notification_list self.rc_util_db.update_notification_list_db(session, "progress", 2, notification_id) return except KeyError: self.rc_util.syslogout_ex("RecoveryControllerStarter_0017", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) return except: self.rc_util.syslogout_ex("RecoveryControllerStarter_0018", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) return
def add_failed_host(self, notification_id, notification_hostname, notification_cluster_port, retry_mode): """ Node recover start thread : This thread starts the VM recover execution thread, only the number of existing vm in the recovery target node. :param notification_id: The notification ID included in the notification :param notification_hostname: The host name of the failure node that is included in the notification """ try: db_engine = dbapi.get_engine() session = dbapi.get_session(db_engine) conf_dict = self.rc_config.get_value('recover_starter') recovery_max_retry_cnt = conf_dict.get('recovery_max_retry_cnt') recovery_retry_interval = conf_dict.get('recovery_retry_interval') vm_list = self.rc_util_api.fetch_servers_on_hypervisor( notification_hostname) # Count vm_list if len(vm_list) == 0: self.rc_util.syslogout_ex("RecoveryControllerStarter_0014", syslog.LOG_INFO) msg = "There is no instance in " + notification_hostname + "." self.rc_util.syslogout(msg, syslog.LOG_INFO) # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return else: result = dbapi.get_all_notification_list_by_id_for_update( session, notification_id) recover_to = result.pop().recover_to if retry_mode is False: cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted( session, recover_to) if not cnt: cnt = dbapi.\ get_one_reserve_list_by_cluster_port_for_update( session, notification_cluster_port, notification_hostname ) if not cnt: self.rc_util.syslogout_ex( "RecoveryControllerStarter_0022", syslog.LOG_WARNING) msg = "The reserve node not exist in " \ "reserve_list DB, " \ "so do not recover instances." self.rc_util.syslogout(msg, syslog.LOG_WARNING) self.rc_util_db.update_notification_list_db( 'progress', 3, notification_id) return result = cnt.pop() recover_to = result.hostname update_at = datetime.datetime.now() dbapi.update_notification_list_by_notification_id_recover_to( session, notification_id, update_at, recover_to) self.rc_util.syslogout_ex( "RecoveryControllerStarter_0024", syslog.LOG_INFO) self.rc_util.syslogout_ex("RecoveryControllerStarter_0015", syslog.LOG_INFO) delete_at = datetime.datetime.now() dbapi.update_reserve_list_by_hostname_as_deleted( session, recover_to, delete_at) # create semaphore (Multiplicity is get from config.) conf_dict = self.rc_config.get_value('recover_starter') sem_recovery_instance = threading.Semaphore( int(conf_dict.get('semaphore_multiplicity'))) incomplete_list = [] for i in range(0, int(recovery_max_retry_cnt)): incomplete_list = [] for vm_uuid in vm_list: primary_id = self._create_vm_list_db_for_failed_host( session, notification_id, vm_uuid) if primary_id: if retry_mode == True: # Skip recovery_instance thread. Will delegate to # ... msg = "RETRY MODE. Skip recovery_instance thread" \ + " vm_uuide=" + vm_uuid \ + " notification_id=" + notification_id self.rc_util.syslogout(msg, syslog.LOG_INFO) else: msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) self.rc_util.syslogout(msg, syslog.LOG_INFO) threading.Thread( target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance)).start() else: if retry_mode == True: continue else: incomplete_list.append(vm_uuid) if incomplete_list: vm_list = incomplete_list greenthread.sleep(int(recovery_retry_interval)) else: break for vm_uuid in incomplete_list: primary_id = self.rc_util_db.insert_vm_list_db( session, notification_id, vm_uuid, 0) # Skip recovery_instance thread. Will delegate to ... self.rc_util.syslogout_ex("RecoveryControllerStarter_0031", syslog.LOG_INFO) msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) self.rc_util.syslogout(msg, syslog.LOG_INFO) threading.Thread(target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance)).start() # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return except KeyError: self.rc_util.syslogout_ex("RecoveryControllerStarter_0017", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) return except: self.rc_util.syslogout_ex("RecoveryControllerStarter_0018", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) return
def add_failed_host(self, notification_id, notification_hostname, notification_cluster_port, retry_mode): """ Node recover start thread : This thread starts the VM recover execution thread, only the number of existing vm in the recovery target node. :param notification_id: The notification ID included in the notification :param notification_hostname: The host name of the failure node that is included in the notification """ try: self.rc_config.set_request_context() db_engine = dbapi.get_engine(self.rc_config) session = dbapi.get_session(db_engine) conf_dict = self.rc_config.get_value('recover_starter') recovery_max_retry_cnt = conf_dict.get('recovery_max_retry_cnt') recovery_retry_interval = conf_dict.get('recovery_retry_interval') vm_list = self.rc_util_api.fetch_servers_on_hypervisor( notification_hostname) # Count vm_list if len(vm_list) == 0: msg = "There is no instance in " + notification_hostname + "." LOG.info(msg) # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return else: msg = "Do get_all_notification_list_by_id_for_update." LOG.info(msg) result = dbapi.get_all_notification_list_by_id_for_update( session, notification_id) msg = "Succeeded in " \ + "get_all_notification_list_by_id_for_update. " \ + "Return_value = " + str(result) LOG.info(msg) recover_to = result.pop().recover_to if retry_mode is False: msg = "Do get_all_reserve_list_by_hostname_not_deleted." LOG.info(msg) cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted( session, recover_to) msg = "Succeeded in " \ + "get_all_reserve_list_by_hostname_not_deleted. " \ + "Return_value = " + str(cnt) LOG.info(msg) if not cnt: msg = "Do " \ + "get_one_reserve_list_by_cluster_port_for_update." LOG.info(msg) cnt = dbapi.\ get_one_reserve_list_by_cluster_port_for_update( session, notification_cluster_port, notification_hostname ) msg = "Succeeded in " \ + "get_one_reserve_list_by_cluster_port_for_update. " \ + "Return_value = " + str(cnt) LOG.info(msg) if not cnt: msg = "The reserve node not exist in " \ "reserve_list DB, " \ "so do not recover instances." LOG.warning(msg) self.rc_util_db.update_notification_list_db( 'progress', 3, notification_id) return result = cnt.pop() recover_to = result.hostname update_at = datetime.datetime.now() msg = "Do " \ + "update_notification_list_by_notification_id_recover_to." LOG.info(msg) dbapi.update_notification_list_by_notification_id_recover_to( session, notification_id, update_at, recover_to ) msg = "Succeeded in " \ + "update_notification_list_by_notification_id_recover_to." LOG.info(msg) delete_at = datetime.datetime.now() msg = "Do update_reserve_list_by_hostname_as_deleted." LOG.info(msg) dbapi.update_reserve_list_by_hostname_as_deleted( session, recover_to, delete_at) msg = "Succeeded in " \ + "update_reserve_list_by_hostname_as_deleted." LOG.info(msg) # create semaphore (Multiplicity is get from config.) conf_dict = self.rc_config.get_value('recover_starter') sem_recovery_instance = threading.Semaphore( int(conf_dict.get('semaphore_multiplicity'))) incomplete_list = [] for i in range(0, int(recovery_max_retry_cnt)): incomplete_list = [] for vm_uuid in vm_list: primary_id = self._create_vm_list_db_for_failed_host( session, notification_id, vm_uuid) if primary_id: if retry_mode is True: # Skip recovery_instance thread. Will delegate to # ... msg = "RETRY MODE. Skip recovery_instance thread" \ + " vm_uuide=" + vm_uuid \ + " notification_id=" + notification_id LOG.info(msg) else: msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) LOG.info(msg) thread_name = self.rc_util.make_thread_name( VM_LIST, primary_id) threading.Thread( target=self.rc_worker.recovery_instance, name=thread_name, args=(vm_uuid, primary_id, sem_recovery_instance)).start() else: if retry_mode is True: continue else: incomplete_list.append(vm_uuid) if incomplete_list: vm_list = incomplete_list greenthread.sleep(int(recovery_retry_interval)) else: break for vm_uuid in incomplete_list: primary_id = self.rc_util_db.insert_vm_list_db( session, notification_id, vm_uuid, 0) # Skip recovery_instance thread. Will delegate to ... msg = "Run thread rc_worker.recovery_instance." \ + " vm_uuid=" + vm_uuid \ + " primary_id=" + str(primary_id) LOG.info(msg) thread_name = self.rc_util.make_thread_name( VM_LIST, primary_id) threading.Thread( target=self.rc_worker.recovery_instance, name=thread_name, args=(vm_uuid, primary_id, sem_recovery_instance)).start() # update record in notification_list self.rc_util_db.update_notification_list_db( session, 'progress', 2, notification_id) return except KeyError: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) return except: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) return