def _get_reserve_node_from_reserve_list_db(self, cluster_port, notification_hostname, session): """ Get reserve node, check it in use and change to 'enable'. :param: con_args: args database connection. :param: cluster_port: select keys, cluster port number. :param :cursor: cursor object :return: hostname: Host name of the spare node machine (obtained from the spare node list table) """ try: # Todo(sampath): write the test codes # Check it msg = "Do get_one_reserve_list_by_cluster_port_for_update." LOG.info(msg) cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update( session, cluster_port, notification_hostname ) msg = "Succeeded in get_one_reserve_list_by_cluster_port_for_update. " \ + "Return_value = " + str(cnt) LOG.info(msg) if not cnt: msg = "The reserve node not exist in reserve_list DB." LOG.warning(msg) hostname = None if not isinstance(cnt, (list, tuple)): hostname = cnt.hostname except Exception as e: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) LOG.error(e.message) raise e return hostname
def _get_reserve_node_from_reserve_list_db(self, cluster_port, notification_hostname, session): """ Get reserve node, check it in use and change to 'enable'. :param: con_args: args database connection. :param: cluster_port: select keys, cluster port number. :param :cursor: cursor object :return: hostname: Host name of the spare node machine (obtained from the spare node list table) """ try: # Todo(sampath): write the test codes # Check it cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update( session, cluster_port, notification_hostname ) if not cnt: self.rc_util.syslogout_ex("RecoveryControllerUtilDb_0008", syslog.LOG_WARNING) msg = "The reserve node not exist in reserve_list DB." self.rc_util.syslogout(msg, syslog.LOG_WARNING) hostname = None if not isinstance(cnt, (list, tuple)): hostname = cnt.hostname except Exception as e: self.rc_util.syslogout_ex("RecoveryControllerUtilDb_0010", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) self.rc_util.syslogout(e.message, syslog.LOG_ERR) raise e return hostname
def _get_reserve_node_from_reserve_list_db(self, cluster_port, notification_hostname, session): """ Get reserve node, check it in use and change to 'enable'. :param: con_args: args database connection. :param: cluster_port: select keys, cluster port number. :param :cursor: cursor object :return: hostname: Host name of the spare node machine (obtained from the spare node list table) """ try: # Todo(sampath): write the test codes # Check it msg = "Do get_one_reserve_list_by_cluster_port_for_update." LOG.info(msg) cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update( session, cluster_port, notification_hostname) msg = "Succeeded in get_one_reserve_list_by_cluster_port_for_update. " \ + "Return_value = " + str(cnt) LOG.info(msg) if not cnt: msg = "The reserve node not exist in reserve_list DB." LOG.warning(msg) hostname = None if not isinstance(cnt, (list, tuple)): hostname = cnt.hostname except Exception as e: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) LOG.error(error_type) LOG.error(error_value) for tb in tb_list: LOG.error(tb) LOG.error(e.message) raise e return hostname
def _get_reserve_node_from_reserve_list_db(self, cluster_port, notification_hostname, session): """ Get reserve node, check it in use and change to 'enable'. :param: con_args: args database connection. :param: cluster_port: select keys, cluster port number. :param :cursor: cursor object :return: hostname: Host name of the spare node machine (obtained from the spare node list table) """ try: # Todo(sampath): write the test codes # Check it cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update( session, cluster_port, notification_hostname) if not cnt: self.rc_util.syslogout_ex("RecoveryControllerUtilDb_0008", syslog.LOG_WARNING) msg = "The reserve node not exist in reserve_list DB." self.rc_util.syslogout(msg, syslog.LOG_WARNING) hostname = None if not isinstance(cnt, (list, tuple)): hostname = cnt.hostname except Exception as e: self.rc_util.syslogout_ex("RecoveryControllerUtilDb_0010", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) self.rc_util.syslogout(e.message, syslog.LOG_ERR) raise e return hostname
def add_failed_host(self, notification_id, notification_hostname, notification_cluster_port, retry_mode): """ Node recover start thread : This thread starts the VM recover execution thread, only the number of existing vm in the recovery target node. :param notification_id: The notification ID included in the notification :param notification_hostname: The host name of the failure node that is included in the notification """ try: db_engine = dbapi.get_engine() session = dbapi.get_session(db_engine) conf_dict = self.rc_config.get_value("recover_starter") recovery_max_retry_cnt = conf_dict.get("recovery_max_retry_cnt") recovery_retry_interval = conf_dict.get("recovery_retry_interval") vm_list = self.rc_util_api.fetch_servers_on_hypervisor(notification_hostname) # Count vm_list if len(vm_list) == 0: self.rc_util.syslogout_ex("RecoveryControllerStarter_0014", syslog.LOG_INFO) msg = "There is no instance in " + notification_hostname + "." self.rc_util.syslogout(msg, syslog.LOG_INFO) # update record in notification_list self.rc_util_db.update_notification_list_db(session, "progress", 2, notification_id) return else: result = dbapi.get_all_notification_list_by_id_for_update(session, notification_id) recover_to = result.pop().recover_to if retry_mode is False: cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted(session, recover_to) if not cnt: cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update( session, notification_cluster_port, notification_hostname ) if not cnt: self.rc_util.syslogout_ex("RecoveryControllerStarter_0022", syslog.LOG_WARNING) msg = "The reserve node not exist in " "reserve_list DB, " "so do not recover instances." self.rc_util.syslogout(msg, syslog.LOG_WARNING) self.rc_util_db.update_notification_list_db("progress", 3, notification_id) return result = cnt.pop() recover_to = result.hostname update_at = datetime.datetime.now() dbapi.update_notification_list_by_notification_id_recover_to( session, notification_id, update_at, recover_to ) self.rc_util.syslogout_ex("RecoveryControllerStarter_0024", syslog.LOG_INFO) self.rc_util.syslogout_ex("RecoveryControllerStarter_0015", syslog.LOG_INFO) delete_at = datetime.datetime.now() dbapi.update_reserve_list_by_hostname_as_deleted(session, recover_to, delete_at) # create semaphore (Multiplicity is get from config.) conf_dict = self.rc_config.get_value("recover_starter") sem_recovery_instance = threading.Semaphore(int(conf_dict.get("semaphore_multiplicity"))) incomplete_list = [] for i in range(0, int(recovery_max_retry_cnt)): incomplete_list = [] for vm_uuid in vm_list: primary_id = self._create_vm_list_db_for_failed_host(session, notification_id, vm_uuid) if primary_id: if retry_mode == True: # Skip recovery_instance thread. Will delegate to # ... msg = ( "RETRY MODE. Skip recovery_instance thread" + " vm_uuide=" + vm_uuid + " notification_id=" + notification_id ) self.rc_util.syslogout(msg, syslog.LOG_INFO) else: msg = ( "Run thread rc_worker.recovery_instance." + " vm_uuid=" + vm_uuid + " primary_id=" + str(primary_id) ) self.rc_util.syslogout(msg, syslog.LOG_INFO) threading.Thread( target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance), ).start() else: if retry_mode == True: continue else: incomplete_list.append(vm_uuid) if incomplete_list: vm_list = incomplete_list greenthread.sleep(int(recovery_retry_interval)) else: break for vm_uuid in incomplete_list: primary_id = self.rc_util_db.insert_vm_list_db(session, notification_id, vm_uuid, 0) # Skip recovery_instance thread. Will delegate to ... self.rc_util.syslogout_ex("RecoveryControllerStarter_0031", syslog.LOG_INFO) msg = ( "Run thread rc_worker.recovery_instance." + " vm_uuid=" + vm_uuid + " primary_id=" + str(primary_id) ) self.rc_util.syslogout(msg, syslog.LOG_INFO) threading.Thread( target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance) ).start() # update record in notification_list self.rc_util_db.update_notification_list_db(session, "progress", 2, notification_id) return except KeyError: self.rc_util.syslogout_ex("RecoveryControllerStarter_0017", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) return except: self.rc_util.syslogout_ex("RecoveryControllerStarter_0018", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) return