예제 #1
0
    def _get_reserve_node_from_reserve_list_db(self,
                                               cluster_port,
                                               notification_hostname,
                                               session):
        """
        Get reserve node, check it in use and change to 'enable'.
        :param: con_args: args database connection.
        :param: cluster_port: select keys, cluster port number.
        :param :cursor: cursor object
        :return: hostname: Host name of the spare node machine
                            (obtained from the spare node list table)

        """

        try:
            # Todo(sampath): write the test codes
            #                Check it
            msg = "Do get_one_reserve_list_by_cluster_port_for_update."
            LOG.info(msg)
            cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update(
                session,
                cluster_port,
                notification_hostname
            )
            msg = "Succeeded in get_one_reserve_list_by_cluster_port_for_update. " \
                + "Return_value = " + str(cnt)
            LOG.info(msg)

            if not cnt:
                msg = "The reserve node not exist in reserve_list DB."
                LOG.warning(msg)
                hostname = None
            if not isinstance(cnt, (list, tuple)):
                hostname = cnt.hostname

        except Exception as e:

            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            LOG.error(error_type)
            LOG.error(error_value)
            for tb in tb_list:
                LOG.error(tb)

            LOG.error(e.message)

            raise e

        return hostname
예제 #2
0
    def _get_reserve_node_from_reserve_list_db(self,
                                               cluster_port,
                                               notification_hostname,
                                               session):
        """
        Get reserve node, check it in use and change to 'enable'.
        :param: con_args: args database connection.
        :param: cluster_port: select keys, cluster port number.
        :param :cursor: cursor object
        :return: hostname: Host name of the spare node machine
                            (obtained from the spare node list table)

        """

        try:
            # Todo(sampath): write the test codes
            #                Check it
            cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update(
                session,
                cluster_port,
                notification_hostname
            )
            if not cnt:
                self.rc_util.syslogout_ex("RecoveryControllerUtilDb_0008",
                                          syslog.LOG_WARNING)
                msg = "The reserve node not exist in reserve_list DB."
                self.rc_util.syslogout(msg, syslog.LOG_WARNING)
                hostname = None
            if not isinstance(cnt, (list, tuple)):
                hostname = cnt.hostname

        except Exception as e:

            self.rc_util.syslogout_ex("RecoveryControllerUtilDb_0010",
                                      syslog.LOG_ERR)
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            self.rc_util.syslogout(error_type, syslog.LOG_ERR)
            self.rc_util.syslogout(error_value, syslog.LOG_ERR)
            for tb in tb_list:
                self.rc_util.syslogout(tb, syslog.LOG_ERR)

            self.rc_util.syslogout(e.message, syslog.LOG_ERR)

            raise e

        return hostname
예제 #3
0
    def _get_reserve_node_from_reserve_list_db(self, cluster_port,
                                               notification_hostname, session):
        """
        Get reserve node, check it in use and change to 'enable'.
        :param: con_args: args database connection.
        :param: cluster_port: select keys, cluster port number.
        :param :cursor: cursor object
        :return: hostname: Host name of the spare node machine
                            (obtained from the spare node list table)

        """

        try:
            # Todo(sampath): write the test codes
            #                Check it
            msg = "Do get_one_reserve_list_by_cluster_port_for_update."
            LOG.info(msg)
            cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update(
                session, cluster_port, notification_hostname)
            msg = "Succeeded in get_one_reserve_list_by_cluster_port_for_update. " \
                + "Return_value = " + str(cnt)
            LOG.info(msg)

            if not cnt:
                msg = "The reserve node not exist in reserve_list DB."
                LOG.warning(msg)
                hostname = None
            if not isinstance(cnt, (list, tuple)):
                hostname = cnt.hostname

        except Exception as e:

            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            LOG.error(error_type)
            LOG.error(error_value)
            for tb in tb_list:
                LOG.error(tb)

            LOG.error(e.message)

            raise e

        return hostname
예제 #4
0
    def _get_reserve_node_from_reserve_list_db(self, cluster_port,
                                               notification_hostname, session):
        """
        Get reserve node, check it in use and change to 'enable'.
        :param: con_args: args database connection.
        :param: cluster_port: select keys, cluster port number.
        :param :cursor: cursor object
        :return: hostname: Host name of the spare node machine
                            (obtained from the spare node list table)

        """

        try:
            # Todo(sampath): write the test codes
            #                Check it
            cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update(
                session, cluster_port, notification_hostname)
            if not cnt:
                self.rc_util.syslogout_ex("RecoveryControllerUtilDb_0008",
                                          syslog.LOG_WARNING)
                msg = "The reserve node not exist in reserve_list DB."
                self.rc_util.syslogout(msg, syslog.LOG_WARNING)
                hostname = None
            if not isinstance(cnt, (list, tuple)):
                hostname = cnt.hostname

        except Exception as e:

            self.rc_util.syslogout_ex("RecoveryControllerUtilDb_0010",
                                      syslog.LOG_ERR)
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            self.rc_util.syslogout(error_type, syslog.LOG_ERR)
            self.rc_util.syslogout(error_value, syslog.LOG_ERR)
            for tb in tb_list:
                self.rc_util.syslogout(tb, syslog.LOG_ERR)

            self.rc_util.syslogout(e.message, syslog.LOG_ERR)

            raise e

        return hostname
예제 #5
0
    def add_failed_host(self, notification_id, notification_hostname, notification_cluster_port, retry_mode):
        """
        Node recover start thread :
            This thread starts the VM recover execution thread,
            only the number of existing vm in the recovery target node.
        :param notification_id: The notification ID included in the
         notification
        :param notification_hostname: The host name of the failure node that
         is included in the notification
        """

        try:
            db_engine = dbapi.get_engine()
            session = dbapi.get_session(db_engine)
            conf_dict = self.rc_config.get_value("recover_starter")
            recovery_max_retry_cnt = conf_dict.get("recovery_max_retry_cnt")
            recovery_retry_interval = conf_dict.get("recovery_retry_interval")

            vm_list = self.rc_util_api.fetch_servers_on_hypervisor(notification_hostname)

            # Count vm_list
            if len(vm_list) == 0:
                self.rc_util.syslogout_ex("RecoveryControllerStarter_0014", syslog.LOG_INFO)
                msg = "There is no instance in " + notification_hostname + "."
                self.rc_util.syslogout(msg, syslog.LOG_INFO)

                # update record in notification_list
                self.rc_util_db.update_notification_list_db(session, "progress", 2, notification_id)

                return
            else:
                result = dbapi.get_all_notification_list_by_id_for_update(session, notification_id)
                recover_to = result.pop().recover_to

                if retry_mode is False:
                    cnt = dbapi.get_all_reserve_list_by_hostname_not_deleted(session, recover_to)

                    if not cnt:
                        cnt = dbapi.get_one_reserve_list_by_cluster_port_for_update(
                            session, notification_cluster_port, notification_hostname
                        )

                        if not cnt:
                            self.rc_util.syslogout_ex("RecoveryControllerStarter_0022", syslog.LOG_WARNING)
                            msg = "The reserve node not exist in " "reserve_list DB, " "so do not recover instances."
                            self.rc_util.syslogout(msg, syslog.LOG_WARNING)
                            self.rc_util_db.update_notification_list_db("progress", 3, notification_id)
                            return

                        result = cnt.pop()
                        recover_to = result.hostname
                        update_at = datetime.datetime.now()
                        dbapi.update_notification_list_by_notification_id_recover_to(
                            session, notification_id, update_at, recover_to
                        )

                        self.rc_util.syslogout_ex("RecoveryControllerStarter_0024", syslog.LOG_INFO)
                self.rc_util.syslogout_ex("RecoveryControllerStarter_0015", syslog.LOG_INFO)

                delete_at = datetime.datetime.now()
                dbapi.update_reserve_list_by_hostname_as_deleted(session, recover_to, delete_at)
            # create semaphore (Multiplicity is get from config.)
            conf_dict = self.rc_config.get_value("recover_starter")
            sem_recovery_instance = threading.Semaphore(int(conf_dict.get("semaphore_multiplicity")))

            incomplete_list = []
            for i in range(0, int(recovery_max_retry_cnt)):
                incomplete_list = []

                for vm_uuid in vm_list:
                    primary_id = self._create_vm_list_db_for_failed_host(session, notification_id, vm_uuid)

                    if primary_id:
                        if retry_mode == True:
                            # Skip recovery_instance thread. Will delegate to
                            # ...
                            msg = (
                                "RETRY MODE. Skip recovery_instance thread"
                                + " vm_uuide="
                                + vm_uuid
                                + " notification_id="
                                + notification_id
                            )
                            self.rc_util.syslogout(msg, syslog.LOG_INFO)
                        else:
                            msg = (
                                "Run thread rc_worker.recovery_instance."
                                + " vm_uuid="
                                + vm_uuid
                                + " primary_id="
                                + str(primary_id)
                            )
                            self.rc_util.syslogout(msg, syslog.LOG_INFO)

                            threading.Thread(
                                target=self.rc_worker.recovery_instance,
                                args=(vm_uuid, primary_id, sem_recovery_instance),
                            ).start()
                    else:
                        if retry_mode == True:
                            continue
                        else:
                            incomplete_list.append(vm_uuid)

                if incomplete_list:
                    vm_list = incomplete_list
                    greenthread.sleep(int(recovery_retry_interval))
                else:
                    break

            for vm_uuid in incomplete_list:
                primary_id = self.rc_util_db.insert_vm_list_db(session, notification_id, vm_uuid, 0)

                # Skip recovery_instance thread. Will delegate to ...
                self.rc_util.syslogout_ex("RecoveryControllerStarter_0031", syslog.LOG_INFO)
                msg = (
                    "Run thread rc_worker.recovery_instance." + " vm_uuid=" + vm_uuid + " primary_id=" + str(primary_id)
                )
                self.rc_util.syslogout(msg, syslog.LOG_INFO)
                threading.Thread(
                    target=self.rc_worker.recovery_instance, args=(vm_uuid, primary_id, sem_recovery_instance)
                ).start()

            # update record in notification_list
            self.rc_util_db.update_notification_list_db(session, "progress", 2, notification_id)

            return

        except KeyError:
            self.rc_util.syslogout_ex("RecoveryControllerStarter_0017", syslog.LOG_ERR)
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            self.rc_util.syslogout(error_type, syslog.LOG_ERR)
            self.rc_util.syslogout(error_value, syslog.LOG_ERR)
            for tb in tb_list:
                self.rc_util.syslogout(tb, syslog.LOG_ERR)
            return
        except:
            self.rc_util.syslogout_ex("RecoveryControllerStarter_0018", syslog.LOG_ERR)
            error_type, error_value, traceback_ = sys.exc_info()
            tb_list = traceback.format_tb(traceback_)
            self.rc_util.syslogout(error_type, syslog.LOG_ERR)
            self.rc_util.syslogout(error_value, syslog.LOG_ERR)
            for tb in tb_list:
                self.rc_util.syslogout(tb, syslog.LOG_ERR)
            return