Example #1
0
    def add_failed_host(self, notification_id, notification_hostname,
                        notification_cluster_port, retry_mode):
        """
        Node recover start thread :
            This thread starts the VM recover execution thread,
            only the number of existing vm in the recovery target node.
        :param notification_id: The notification ID included in the
         notification
        :param notification_hostname: The host name of the failure node that
         is included in the notification
        """

        try:
            self.rc_config.set_request_context()
            db_engine = dbapi.get_engine(self.rc_config)
            session = dbapi.get_session(db_engine)
            conf_dict = self.rc_config.get_value('recover_starter')
            recovery_max_retry_cnt = conf_dict.get('recovery_max_retry_cnt')
            recovery_retry_interval = conf_dict.get('recovery_retry_interval')

            vm_list = self.rc_util_api.fetch_servers_on_hypervisor(
                notification_hostname)

            # Count vm_list
            if len(vm_list) == 0:
                msg = "There is no instance in " + notification_hostname + "."
                LOG.info(msg)

                # update record in notification_list
                self.rc_util_db.update_notification_list_db(
                    session, 'progress', 2, notification_id)

                return
            else:
                msg = "Do get_all_notification_list_by_id_for_update."
                LOG.info(msg)
                result = dbapi.get_all_notification_list_by_id_for_update(
                    session, notification_id)
                msg = "Succeeded in " \
                    + "get_all_notification_list_by_id_for_update. " \
                    + "Return_value = " + str(result)
                LOG.info(msg)
                recover_to = result.pop().recover_to

                if retry_mode is False:
                    msg = "Do get_all_reserve_list_by_hostname_not_deleted."
                    LOG.info(msg)
                    cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted(
                        session, recover_to)
                    msg = "Succeeded in " \
                        + "get_all_reserve_list_by_hostname_not_deleted. " \
                        + "Return_value = " + str(cnt)
                    LOG.info(msg)

                    if not cnt:
                        msg = "Do " \
                            + "get_one_reserve_list_by_cluster_port_for_update."
                        LOG.info(msg)
                        cnt = dbapi.\
                            get_one_reserve_list_by_cluster_port_for_update(
                                session,
                                notification_cluster_port,
                                notification_hostname
                            )
                        msg = "Succeeded in " \
                            + "get_one_reserve_list_by_cluster_port_for_update. " \
                            + "Return_value = " + str(cnt)
                        LOG.info(msg)

                        if not cnt:
                            msg = "The reserve node not exist in " \
                                  "reserve_list DB, " \
                                  "so do not recover instances."
                            LOG.warning(msg)
                            self.rc_util_db.update_notification_list_db(
                                'progress', 3, notification_id)

                            return

                        result = cnt.pop()
                        recover_to = result.hostname
                        update_at = datetime.datetime.now()
                        msg = "Do " \
                            + "update_notification_list_by_notification_id_recover_to."
                        LOG.info(msg)
                        dbapi.update_notification_list_by_notification_id_recover_to(
                            session, notification_id, update_at, recover_to)
                        msg = "Succeeded in " \
                            + "update_notification_list_by_notification_id_recover_to."
                        LOG.info(msg)

                delete_at = datetime.datetime.now()

                msg = "Do update_reserve_list_by_hostname_as_deleted."
                LOG.info(msg)
                dbapi.update_reserve_list_by_hostname_as_deleted(
                    session, recover_to, delete_at)
                msg = "Succeeded in " \
                    + "update_reserve_list_by_hostname_as_deleted."
                LOG.info(msg)
            # create semaphore (Multiplicity is get from config.)
            conf_dict = self.rc_config.get_value('recover_starter')
            sem_recovery_instance = threading.Semaphore(
                int(conf_dict.get('semaphore_multiplicity')))

            incomplete_list = []
            for i in range(0, int(recovery_max_retry_cnt)):
                incomplete_list = []

                for vm_uuid in vm_list:
                    primary_id = self._create_vm_list_db_for_failed_host(
                        session, notification_id, vm_uuid)

                    if primary_id:
                        if retry_mode is True:
                            # Skip recovery_instance thread. Will delegate to
                            # ...
                            msg = "RETRY MODE. Skip recovery_instance thread" \
                                + " vm_uuide=" + vm_uuid \
                                + " notification_id=" + notification_id
                            LOG.info(msg)
                        else:
                            msg = "Run thread rc_worker.recovery_instance." \
                                + " vm_uuid=" + vm_uuid \
                                + " primary_id=" + str(primary_id)
                            LOG.info(msg)

                            thread_name = self.rc_util.make_thread_name(
                                VM_LIST, primary_id)
                            threading.Thread(
                                target=self.rc_worker.recovery_instance,
                                name=thread_name,
                                args=(vm_uuid, primary_id,
                                      sem_recovery_instance)).start()
                    else:
                        if retry_mode is True:
                            continue
                        else:
                            incomplete_list.append(vm_uuid)

                if incomplete_list:
                    vm_list = incomplete_list
                    greenthread.sleep(int(recovery_retry_interval))
                else:
                    break

            for vm_uuid in incomplete_list:
                primary_id = self.rc_util_db.insert_vm_list_db(
                    session, notification_id, vm_uuid, 0)

                # Skip recovery_instance thread. Will delegate to ...
                msg = "Run thread rc_worker.recovery_instance." \
                    + " vm_uuid=" + vm_uuid \
                    + " primary_id=" + str(primary_id)
                LOG.info(msg)
                thread_name = self.rc_util.make_thread_name(
                    VM_LIST, primary_id)
                threading.Thread(target=self.rc_worker.recovery_instance,
                                 name=thread_name,
                                 args=(vm_uuid, primary_id,
                                       sem_recovery_instance)).start()

            # update record in notification_list
            self.rc_util_db.update_notification_list_db(
                session, 'progress', 2, notification_id)

            return

        except KeyError:
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            LOG.error(error_type)
            LOG.error(error_value)
            for tb in tb_list:
                LOG.error(tb)
            return
        except:
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            LOG.error(error_type)
            LOG.error(error_value)
            for tb in tb_list:
                LOG.error(tb)
            return
Example #2
0
    def add_failed_host(self, notification_id, notification_hostname, notification_cluster_port, retry_mode):
        """
        Node recover start thread :
            This thread starts the VM recover execution thread,
            only the number of existing vm in the recovery target node.
        :param notification_id: The notification ID included in the
         notification
        :param notification_hostname: The host name of the failure node that
         is included in the notification
        """

        try:
            db_engine = dbapi.get_engine()
            session = dbapi.get_session(db_engine)
            conf_dict = self.rc_config.get_value("recover_starter")
            recovery_max_retry_cnt = conf_dict.get("recovery_max_retry_cnt")
            recovery_retry_interval = conf_dict.get("recovery_retry_interval")

            vm_list = self.rc_util_api.fetch_servers_on_hypervisor(notification_hostname)

            # Count vm_list
            if len(vm_list) == 0:
                self.rc_util.syslogout_ex("RecoveryControllerStarter_0014", syslog.LOG_INFO)
                msg = "There is no instance in " + notification_hostname + "."
                self.rc_util.syslogout(msg, syslog.LOG_INFO)

                # update record in notification_list
                self.rc_util_db.update_notification_list_db(session, "progress", 2, notification_id)

                return
            else:
                result = dbapi.get_all_notification_list_by_id_for_update(session, notification_id)
                recover_to = result.pop().recover_to

                if retry_mode is False:
                    cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted(session, recover_to)

                    if not cnt:
                        cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update(
                            session, notification_cluster_port, notification_hostname
                        )

                        if not cnt:
                            self.rc_util.syslogout_ex("RecoveryControllerStarter_0022", syslog.LOG_WARNING)
                            msg = "The reserve node not exist in " "reserve_list DB, " "so do not recover instances."
                            self.rc_util.syslogout(msg, syslog.LOG_WARNING)
                            self.rc_util_db.update_notification_list_db("progress", 3, notification_id)
                            return

                        result = cnt.pop()
                        recover_to = result.hostname
                        update_at = datetime.datetime.now()
                        dbapi.update_notification_list_by_notification_id_recover_to(
                            session, notification_id, update_at, recover_to
                        )

                        self.rc_util.syslogout_ex("RecoveryControllerStarter_0024", syslog.LOG_INFO)
                self.rc_util.syslogout_ex("RecoveryControllerStarter_0015", syslog.LOG_INFO)

                delete_at = datetime.datetime.now()
                dbapi.update_reserve_list_by_hostname_as_deleted(session, recover_to, delete_at)
            # create semaphore (Multiplicity is get from config.)
            conf_dict = self.rc_config.get_value("recover_starter")
            sem_recovery_instance = threading.Semaphore(int(conf_dict.get("semaphore_multiplicity")))

            incomplete_list = []
            for i in range(0, int(recovery_max_retry_cnt)):
                incomplete_list = []

                for vm_uuid in vm_list:
                    primary_id = self._create_vm_list_db_for_failed_host(session, notification_id, vm_uuid)

                    if primary_id:
                        if retry_mode == True:
                            # Skip recovery_instance thread. Will delegate to
                            # ...
                            msg = (
                                "RETRY MODE. Skip recovery_instance thread"
                                + " vm_uuide="
                                + vm_uuid
                                + " notification_id="
                                + notification_id
                            )
                            self.rc_util.syslogout(msg, syslog.LOG_INFO)
                        else:
                            msg = (
                                "Run thread rc_worker.recovery_instance."
                                + " vm_uuid="
                                + vm_uuid
                                + " primary_id="
                                + str(primary_id)
                            )
                            self.rc_util.syslogout(msg, syslog.LOG_INFO)

                            threading.Thread(
                                target=self.rc_worker.recovery_instance,
                                args=(vm_uuid, primary_id, sem_recovery_instance),
                            ).start()
                    else:
                        if retry_mode == True:
                            continue
                        else:
                            incomplete_list.append(vm_uuid)

                if incomplete_list:
                    vm_list = incomplete_list
                    greenthread.sleep(int(recovery_retry_interval))
                else:
                    break

            for vm_uuid in incomplete_list:
                primary_id = self.rc_util_db.insert_vm_list_db(session, notification_id, vm_uuid, 0)

                # Skip recovery_instance thread. Will delegate to ...
                self.rc_util.syslogout_ex("RecoveryControllerStarter_0031", syslog.LOG_INFO)
                msg = (
                    "Run thread rc_worker.recovery_instance." + " vm_uuid=" + vm_uuid + " primary_id=" + str(primary_id)
                )
                self.rc_util.syslogout(msg, syslog.LOG_INFO)
                threading.Thread(
                    target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance)
                ).start()

            # update record in notification_list
            self.rc_util_db.update_notification_list_db(session, "progress", 2, notification_id)

            return

        except KeyError:
            self.rc_util.syslogout_ex("RecoveryControllerStarter_0017", syslog.LOG_ERR)
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            self.rc_util.syslogout(error_type, syslog.LOG_ERR)
            self.rc_util.syslogout(error_value, syslog.LOG_ERR)
            for tb in tb_list:
                self.rc_util.syslogout(tb, syslog.LOG_ERR)
            return
        except:
            self.rc_util.syslogout_ex("RecoveryControllerStarter_0018", syslog.LOG_ERR)
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            self.rc_util.syslogout(error_type, syslog.LOG_ERR)
            self.rc_util.syslogout(error_value, syslog.LOG_ERR)
            for tb in tb_list:
                self.rc_util.syslogout(tb, syslog.LOG_ERR)
            return
Example #3
0
    def add_failed_host(self, notification_id, notification_hostname,
                        notification_cluster_port, retry_mode):
        """
        Node recover start thread :
            This thread starts the VM recover execution thread,
            only the number of existing vm in the recovery target node.
        :param notification_id: The notification ID included in the
         notification
        :param notification_hostname: The host name of the failure node that
         is included in the notification
        """

        try:
            db_engine = dbapi.get_engine()
            session = dbapi.get_session(db_engine)
            conf_dict = self.rc_config.get_value('recover_starter')
            recovery_max_retry_cnt = conf_dict.get('recovery_max_retry_cnt')
            recovery_retry_interval = conf_dict.get('recovery_retry_interval')

            vm_list = self.rc_util_api.fetch_servers_on_hypervisor(
                notification_hostname)

            # Count vm_list
            if len(vm_list) == 0:
                self.rc_util.syslogout_ex("RecoveryControllerStarter_0014",
                                          syslog.LOG_INFO)
                msg = "There is no instance in " + notification_hostname + "."
                self.rc_util.syslogout(msg, syslog.LOG_INFO)

                # update record in notification_list
                self.rc_util_db.update_notification_list_db(
                    session, 'progress', 2, notification_id)

                return
            else:
                result = dbapi.get_all_notification_list_by_id_for_update(
                    session, notification_id)
                recover_to = result.pop().recover_to

                if retry_mode is False:
                    cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted(
                        session, recover_to)

                    if not cnt:
                        cnt = dbapi.\
                            get_one_reserve_list_by_cluster_port_for_update(
                                session,
                                notification_cluster_port,
                                notification_hostname
                            )

                        if not cnt:
                            self.rc_util.syslogout_ex(
                                "RecoveryControllerStarter_0022",
                                syslog.LOG_WARNING)
                            msg = "The reserve node not exist in " \
                                  "reserve_list DB, " \
                                  "so do not recover instances."
                            self.rc_util.syslogout(msg, syslog.LOG_WARNING)
                            self.rc_util_db.update_notification_list_db(
                                'progress', 3, notification_id)
                            return

                        result = cnt.pop()
                        recover_to = result.hostname
                        update_at = datetime.datetime.now()
                        dbapi.update_notification_list_by_notification_id_recover_to(
                            session, notification_id, update_at, recover_to)

                        self.rc_util.syslogout_ex(
                            "RecoveryControllerStarter_0024", syslog.LOG_INFO)
                self.rc_util.syslogout_ex("RecoveryControllerStarter_0015",
                                          syslog.LOG_INFO)

                delete_at = datetime.datetime.now()
                dbapi.update_reserve_list_by_hostname_as_deleted(
                    session, recover_to, delete_at)
            # create semaphore (Multiplicity is get from config.)
            conf_dict = self.rc_config.get_value('recover_starter')
            sem_recovery_instance = threading.Semaphore(
                int(conf_dict.get('semaphore_multiplicity')))

            incomplete_list = []
            for i in range(0, int(recovery_max_retry_cnt)):
                incomplete_list = []

                for vm_uuid in vm_list:
                    primary_id = self._create_vm_list_db_for_failed_host(
                        session, notification_id, vm_uuid)

                    if primary_id:
                        if retry_mode == True:
                            # Skip recovery_instance thread. Will delegate to
                            # ...
                            msg = "RETRY MODE. Skip recovery_instance thread" \
                                + " vm_uuide=" + vm_uuid \
                                + " notification_id=" + notification_id
                            self.rc_util.syslogout(msg, syslog.LOG_INFO)
                        else:
                            msg = "Run thread rc_worker.recovery_instance." \
                                + " vm_uuid=" + vm_uuid \
                                + " primary_id=" + str(primary_id)
                            self.rc_util.syslogout(msg, syslog.LOG_INFO)

                            threading.Thread(
                                target=self.rc_worker.recovery_instance,
                                args=(vm_uuid, primary_id,
                                      sem_recovery_instance)).start()
                    else:
                        if retry_mode == True:
                            continue
                        else:
                            incomplete_list.append(vm_uuid)

                if incomplete_list:
                    vm_list = incomplete_list
                    greenthread.sleep(int(recovery_retry_interval))
                else:
                    break

            for vm_uuid in incomplete_list:
                primary_id = self.rc_util_db.insert_vm_list_db(
                    session, notification_id, vm_uuid, 0)

                # Skip recovery_instance thread. Will delegate to ...
                self.rc_util.syslogout_ex("RecoveryControllerStarter_0031",
                                          syslog.LOG_INFO)
                msg = "Run thread rc_worker.recovery_instance." \
                    + " vm_uuid=" + vm_uuid \
                    + " primary_id=" + str(primary_id)
                self.rc_util.syslogout(msg, syslog.LOG_INFO)
                threading.Thread(target=self.rc_worker.recovery_instance,
                                 args=(vm_uuid, primary_id,
                                       sem_recovery_instance)).start()

            # update record in notification_list
            self.rc_util_db.update_notification_list_db(
                session, 'progress', 2, notification_id)

            return

        except KeyError:
            self.rc_util.syslogout_ex("RecoveryControllerStarter_0017",
                                      syslog.LOG_ERR)
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            self.rc_util.syslogout(error_type, syslog.LOG_ERR)
            self.rc_util.syslogout(error_value, syslog.LOG_ERR)
            for tb in tb_list:
                self.rc_util.syslogout(tb, syslog.LOG_ERR)
            return
        except:
            self.rc_util.syslogout_ex("RecoveryControllerStarter_0018",
                                      syslog.LOG_ERR)
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            self.rc_util.syslogout(error_type, syslog.LOG_ERR)
            self.rc_util.syslogout(error_value, syslog.LOG_ERR)
            for tb in tb_list:
                self.rc_util.syslogout(tb, syslog.LOG_ERR)
            return
Example #4
0
    def add_failed_host(self,
                        notification_id,
                        notification_hostname,
                        notification_cluster_port,
                        retry_mode):
        """
        Node recover start thread :
            This thread starts the VM recover execution thread,
            only the number of existing vm in the recovery target node.
        :param notification_id: The notification ID included in the
         notification
        :param notification_hostname: The host name of the failure node that
         is included in the notification
        """

        try:
            self.rc_config.set_request_context()
            db_engine = dbapi.get_engine(self.rc_config)
            session = dbapi.get_session(db_engine)
            conf_dict = self.rc_config.get_value('recover_starter')
            recovery_max_retry_cnt = conf_dict.get('recovery_max_retry_cnt')
            recovery_retry_interval = conf_dict.get('recovery_retry_interval')

            vm_list = self.rc_util_api.fetch_servers_on_hypervisor(
                notification_hostname)

            # Count vm_list
            if len(vm_list) == 0:
                msg = "There is no instance in " + notification_hostname + "."
                LOG.info(msg)

                # update record in notification_list
                self.rc_util_db.update_notification_list_db(
                    session, 'progress', 2, notification_id)

                return
            else:
                msg = "Do get_all_notification_list_by_id_for_update."
                LOG.info(msg)
                result = dbapi.get_all_notification_list_by_id_for_update(
                    session, notification_id)
                msg = "Succeeded in " \
                    + "get_all_notification_list_by_id_for_update. " \
                    + "Return_value = " + str(result)
                LOG.info(msg)
                recover_to = result.pop().recover_to

                if retry_mode is False:
                    msg = "Do get_all_reserve_list_by_hostname_not_deleted."
                    LOG.info(msg)
                    cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted(
                        session,
                        recover_to)
                    msg = "Succeeded in " \
                        + "get_all_reserve_list_by_hostname_not_deleted. " \
                        + "Return_value = " + str(cnt)
                    LOG.info(msg)

                    if not cnt:
                        msg = "Do " \
                            + "get_one_reserve_list_by_cluster_port_for_update."
                        LOG.info(msg)
                        cnt = dbapi.\
                            get_one_reserve_list_by_cluster_port_for_update(
                                session,
                                notification_cluster_port,
                                notification_hostname
                            )
                        msg = "Succeeded in " \
                            + "get_one_reserve_list_by_cluster_port_for_update. " \
                            + "Return_value = " + str(cnt)
                        LOG.info(msg)

                        if not cnt:
                            msg = "The reserve node not exist in " \
                                  "reserve_list DB, " \
                                  "so do not recover instances."
                            LOG.warning(msg)
                            self.rc_util_db.update_notification_list_db(
                                'progress', 3, notification_id)

                            return

                        result = cnt.pop()
                        recover_to = result.hostname
                        update_at = datetime.datetime.now()
                        msg = "Do " \
                            + "update_notification_list_by_notification_id_recover_to."
                        LOG.info(msg)
                        dbapi.update_notification_list_by_notification_id_recover_to(
                            session,
                            notification_id,
                            update_at,
                            recover_to
                        )
                        msg = "Succeeded in " \
                            + "update_notification_list_by_notification_id_recover_to."
                        LOG.info(msg)

                delete_at = datetime.datetime.now()

                msg = "Do update_reserve_list_by_hostname_as_deleted."
                LOG.info(msg)
                dbapi.update_reserve_list_by_hostname_as_deleted(
                    session, recover_to, delete_at)
                msg = "Succeeded in " \
                    + "update_reserve_list_by_hostname_as_deleted."
                LOG.info(msg)
            # create semaphore (Multiplicity is get from config.)
            conf_dict = self.rc_config.get_value('recover_starter')
            sem_recovery_instance = threading.Semaphore(
                int(conf_dict.get('semaphore_multiplicity')))

            incomplete_list = []
            for i in range(0, int(recovery_max_retry_cnt)):
                incomplete_list = []

                for vm_uuid in vm_list:
                    primary_id = self._create_vm_list_db_for_failed_host(
                        session, notification_id, vm_uuid)

                    if primary_id:
                        if retry_mode is True:
                            # Skip recovery_instance thread. Will delegate to
                            # ...
                            msg = "RETRY MODE. Skip recovery_instance thread" \
                                + " vm_uuide=" + vm_uuid \
                                + " notification_id=" + notification_id
                            LOG.info(msg)
                        else:
                            msg = "Run thread rc_worker.recovery_instance." \
                                + " vm_uuid=" + vm_uuid \
                                + " primary_id=" + str(primary_id)
                            LOG.info(msg)

                            thread_name = self.rc_util.make_thread_name(
                                VM_LIST, primary_id)
                            threading.Thread(
                                target=self.rc_worker.recovery_instance,
                                name=thread_name,
                                args=(vm_uuid, primary_id,
                                      sem_recovery_instance)).start()
                    else:
                        if retry_mode is True:
                            continue
                        else:
                            incomplete_list.append(vm_uuid)

                if incomplete_list:
                    vm_list = incomplete_list
                    greenthread.sleep(int(recovery_retry_interval))
                else:
                    break

            for vm_uuid in incomplete_list:
                primary_id = self.rc_util_db.insert_vm_list_db(
                    session, notification_id, vm_uuid, 0)

                # Skip recovery_instance thread. Will delegate to ...
                msg = "Run thread rc_worker.recovery_instance." \
                    + " vm_uuid=" + vm_uuid \
                    + " primary_id=" + str(primary_id)
                LOG.info(msg)
                thread_name = self.rc_util.make_thread_name(
                    VM_LIST, primary_id)
                threading.Thread(
                    target=self.rc_worker.recovery_instance,
                    name=thread_name,
                    args=(vm_uuid, primary_id,
                          sem_recovery_instance)).start()

            # update record in notification_list
            self.rc_util_db.update_notification_list_db(
                session, 'progress', 2, notification_id)

            return

        except KeyError:
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            LOG.error(error_type)
            LOG.error(error_value)
            for tb in tb_list:
                LOG.error(tb)
            return
        except:
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            LOG.error(error_type)
            LOG.error(error_value)
            for tb in tb_list:
                LOG.error(tb)
            return