コード例 #1
0
    def get(self):
        zkOper = self.retrieve_zkOper()
        if not is_monitoring(get_localhost_ip(), zkOper):
            self.finish("true")
            return
        conn = self.dba_opers.get_mysql_connection()

        if conn is None:
            self.finish("false")
            return

        try:
            current_connections_rows = self.dba_opers.show_processlist(conn)
            max_connections_rows = self.dba_opers.show_variables(
                conn, 'max_connections')
        finally:
            conn.close()

        current_connections_count = len(current_connections_rows)
        max_connections_rows_dict = dict(max_connections_rows)
        max_connections = max_connections_rows_dict.get("max_connections")

        if current_connections_count / int(max_connections) < 0.7:
            self.finish("true")
            return

        self.finish("false")
コード例 #2
0
    def sync_info_from_zk(self, node_ip_addr):
        zkOper = ZkOpers()

        try:
            cluster_existed = zkOper.existCluster()
            if cluster_existed:
                clusterUUID = zkOper.getClusterUUID()
                data, _ = zkOper.retrieveClusterProp(clusterUUID)

                node_ip_addr = get_localhost_ip()
                assert node_ip_addr
                return_result = zkOper.retrieve_data_node_info(node_ip_addr)

                json_str_data = data.replace("'", "\"")
                dict_data = json.loads(json_str_data)
                if type(return_result) is dict and type(dict_data) is dict:
                    config_file_obj = ConfigFileOpers()
                    config_file_obj.setValue(options.data_node_property,
                                             return_result)
                    config_file_obj.setValue(options.cluster_property,
                                             dict_data)
                    logging.debug(
                        "program has re-written zk data into configuration file"
                    )
                else:
                    logging.info("write data into configuration failed")
        finally:
            zkOper.stop()
コード例 #3
0
 def _send_monitor_email(self, anti_item_content):
     local_ip = get_localhost_ip()
     # send email
     subject = "[%s] Auti-Item existed in MySQL according to Galera way" % options.sitename
     body = anti_item_content + "\nip:" + local_ip
     if options.send_email_switch:
         send_email(options.admins, subject, body)
コード例 #4
0
    def _send_log_info_email(self, subject, content):
        local_ip = get_localhost_ip()
        # send email
        #         body = self.render_string("errors/500_email.html", exception=content)

        body = content + "\nip:" + local_ip

        if options.send_email_switch:
            send_email(options.admins, subject, body)
コード例 #5
0
    def run(self):
        isLock, lock = self.zkOpers.lock_backup_action()
        if not isLock:
            logging.info('zk is not lock')
            return

        try:
            _password = retrieve_monitor_password()
            conn = self.dba_opers.get_mysql_connection(user="******",
                                                       passwd=_password)
            if None == conn:
                raise UserVisiableException("Can\'t connect to mysql server")

            db_status = self.dba_opers.show_status(conn)
            if 'Synced' != db_status[-14][1]:
                self.backup_record[
                    'error: '] = 'Mcluster is not start %s' % datetime.datetime.now(
                    ).strftime(TIME_FORMAT)
                self.backupOpers._write_info_to_local(
                    self.backupOpers.path, self.backupOpers.file_name,
                    self.backup_record)
                self.zkOpers.write_backup_backup_info(self.backup_record)
                return

            if '0' == self.__run_comm(CHECK_DMP_DATA_CMD):
                self.backup_record[
                    'error: '] = 'No have /data partition %s' % datetime.datetime.now(
                    ).strftime(TIME_FORMAT)
                self.backupOpers._write_info_to_local(
                    self.backupOpers.path, self.backupOpers.file_name,
                    self.backup_record)
                self.zkOpers.write_backup_backup_info(self.backup_record)
                return

            self.backupOpers.create_backup_directory()
            self.backupOpers.remove_expired_backup_file()

            self.backupOpers.backup_action(self.zkOpers)
            self.backupOpers.trans_backup_file(self.zkOpers)

            record = {
                "recently_backup_ip: ": str(get_localhost_ip()),
                'time: ': datetime.datetime.now().strftime(TIME_FORMAT),
                'backup_type: ': self._backup_mode
            }
            self.zkOpers.write_backup_backup_info(record)

        except Exception, e:
            record = {
                "error: ": 'backup is wrong, please check it!',
                'time:': datetime.datetime.now().strftime(TIME_FORMAT),
                'backup_type: ': self._backup_mode
            }
            self.zkOpers.write_backup_backup_info(record)
            logging.error(e, exc_info=True)
コード例 #6
0
 def _send_error_email(self, exception):
     try:
         # send email
         subject = "[%s]Internal Server Error" % options.sitename
         host_ip = get_localhost_ip()
         version_str = '{0}-{1}'.format(__app__, __version__)
         exception += "\n" + version_str + "\nhost ip :" + host_ip
         if options.send_email_switch:
             send_email(options.admins, subject, exception + '')
     except Exception:
         logging.error(traceback.format_exc())
コード例 #7
0
ファイル: base.py プロジェクト: mad3310/galera-manager
    def _send_error_email(self, exception):
        try:
            local_ip = get_localhost_ip()
            version_str = '{0}-{1}'.format(__app__, __version__)
            logging.info("version_str :" + str(version_str))
            # send email
            subject = "[%s]Internal Server Error " % options.sitename
            body = "{0}\n{1}\nip:{2}".format(exception, version_str, local_ip)

            #            email_from = "%s <noreply@%s>" % (options.sitename, options.domain)
            if options.send_email_switch:
                send_email(options.admins, subject, body)
        except Exception:
            logging.error(traceback.format_exc())
コード例 #8
0
    def trans_backup_file(self, zkOpers):
        now_time = datetime.datetime.now()
        record = '%s  == cp incr_backup_file is starting  == ' % now_time.strftime(
            TIME_FORMAT)

        self.status['cp_incr_file_status:'] = Status.backup_transmit_starting
        self.status['cp_incr_file_start_time:'] = now_time.strftime(
            TIME_FORMAT)

        self._write_info_to_local(self.path, self.file_name, record)
        zkOpers.write_backup_innerbackup_info(self.status)

        rsync_cmd = RSYNC % (self.time, BACKUP_CONFIG.INCR_LOCAL_DIR,
                             BACKUP_CONFIG.INCR_REMOTE_DIR)
        run_rsync_relust = os.system(rsync_cmd)

        now_time = datetime.datetime.now()
        if 0 == run_rsync_relust:
            in_backup_rs_path_cmd = 'rm -rf ' + BACKUP_CONFIG.INCR_LOCAL_DIR + \
                                    '/incre_backup-' + self.time
            self._run_comm_call(in_backup_rs_path_cmd)

            record = '%s  == cp incr_backup_file ok  == ' % now_time.strftime(
                TIME_FORMAT)

            self.status[
                'cp_incr_file_status:'] = Status.backup_transmit_succeed
            self.status['incr_backup_ip:'] = str(get_localhost_ip())
            self.status['cp_incr_file_finish_time:'] = now_time.strftime(
                TIME_FORMAT)

            self._write_info_to_local(self.path, self.file_name, record)
            zkOpers.write_backup_innerbackup_info(self.status)

        else:
            record = '%s  == incr_backup_file is not cp /data == ' % now_time.strftime(
                TIME_FORMAT)
            self.status['cp_incr_status:'] = Status.backup_transmit_faild
            self.status['cp_incr_finish_time:'] = now_time.strftime(
                TIME_FORMAT)

            self._write_info_to_local(self.path, self.file_name, record)
            zkOpers.write_backup_innerbackup_info(self.status)
            return

        record = '%s  == the incr backup is completed == ' % now_time.strftime(
            TIME_FORMAT)
        self._write_info_to_local(self.path, self.file_name, record)

        self._delete_file(BACKUP_CONFIG.LOG_FILE_PATH + '/incr', days_count=8)
コード例 #9
0
    def get(self):
        zkOper = self.retrieve_zkOper()

        if not is_monitoring(get_localhost_ip(), zkOper):
            self.finish("true")
            return
        try:
            check_result = self.dba_opers.retrieve_wsrep_status()
            logging.info("check_wsrepstatus : %s" % (check_result))
        except:
            error_message = "connection break down"
            raise HTTPAPIErrorException(error_message, status_code=417)

        if not check_result:
            self.finish("false")
            return

        self.finish("true")
コード例 #10
0
    def trans_backup_file(self, ZkOpers):
        now_time = datetime.datetime.now()
        record = '%s  == cp backup_file is starting  == ' % now_time.strftime(
            TIME_FORMAT)

        self.status['cp_file_status:'] = Status.backup_transmit_starting
        self.status['cp_file_start_time:'] = now_time.strftime(TIME_FORMAT)

        self._write_info_to_local(self.path, self.file_name, record)
        ZkOpers.write_backup_fullbackup_info(self.status)

        rsync_cmd = RSYNC % (self.time, BACKUP_CONFIG.FULL_LOCAL_DIR,
                             BACKUP_CONFIG.FULL_REMOTE_DIR)
        run_rsync_result = os.system(rsync_cmd)

        now_time = datetime.datetime.now()
        if 0 == run_rsync_result:
            self._fb_update_index('/full_backup-' + self.time)

            record = '%s  == Cp backup_file ok == ' % now_time.strftime(
                TIME_FORMAT)
            self.status['cp_status:'] = Status.backup_transmit_succeed
            self.status['full_backup_ip:'] = str(get_localhost_ip())
            self.status['cp_finish_time:'] = now_time.strftime(TIME_FORMAT)

            self._write_info_to_local(self.path, self.file_name, record)
            ZkOpers.write_backup_fullbackup_info(self.status)

        else:
            record = '%s  == backup_file is not cp /data == ' % now_time.strftime(
                TIME_FORMAT)
            self.status['cp_status:'] = Status.backup_transmit_faild
            self.status['backup_ip:'] = None
            self.status['cp_finish_time:'] = now_time.strftime(TIME_FORMAT)

            self._write_info_to_local(self.path, self.file_name, record)
            ZkOpers.write_backup_fullbackup_info(self.status)
            return

        record = '%s  == the full backup is completed == ' % now_time.strftime(
            TIME_FORMAT)
        self._write_info_to_local(self.path, self.file_name, record)

        self._delete_file(BACKUP_CONFIG.LOG_FILE_PATH)
コード例 #11
0
    def check(self, data_node_info_list):
        zkOper = Scheduler_ZkOpers()
        if not is_monitoring(get_localhost_ip(), zkOper):
            return
        conn = self.dba_opers.get_mysql_connection()

        monitor_type, monitor_key = "db", "existed_db_anti_item"
        error_record = {}
        anti_item_count, msg, failed_count = 0, "", 0
        _path_value = zkOper.retrieve_monitor_status_value(
            monitor_type, monitor_key)
        if _path_value != {}:
            failed_count = int(
                re.findall(r'failed count=(\d)', _path_value['message'])[0])

        if conn == None:
            failed_count += 1
            if failed_count > 4:
                anti_item_count = 500
                error_record.setdefault("msg", "no way to connect to db")
        else:
            try:
                anti_item_count, msg, anti_item_detail = self._anti_item_check(
                    conn)
            finally:
                conn.close()
            if anti_item_count > 0:
                error_record.setdefault(
                    "msg",
                    "mcluster existed on %s please check which db right now." %
                    (msg))
                error_record.setdefault("detail", anti_item_detail)
                logging.info(error_record)
        alarm_level = self.retrieve_alarm_level(anti_item_count, 0, 0)
        logging.info("existed anti_item alarm_level :%s" % (alarm_level))
        super(Check_DB_Anti_Item,
              self).write_status(anti_item_count, 0, failed_count, alarm_level,
                                 error_record, monitor_type, monitor_key)
        super(Check_DB_Anti_Item,
              self).write_status_to_es(anti_item_count, 0, failed_count,
                                       alarm_level, error_record, monitor_type,
                                       monitor_key)
コード例 #12
0
    def bin_log_node_stat(self):
        conn = self.dba_opers.get_mysql_connection()
        if None == conn:
            raise UserVisiableException("Can\'t connect to mysql server")
        try:
            cursor = conn.cursor()
            cursor.execute("show variables like 'log_bin'")
            rows_stat_log_bin = cursor.fetchall()
            stat_log_bin = rows_stat_log_bin[0][1]
        finally:
            conn.close()

        zkOper = self.retrieve_zkOper()
        started_node_list = zkOper.retrieve_started_nodes()
        local_ip = get_localhost_ip()
        if local_ip in started_node_list:
            started_node_list.remove(local_ip)

        result = {}
        result.setdefault('node_list', started_node_list)
        result.setdefault('stat_log_bin', stat_log_bin)
        return result
コード例 #13
0
    def get(self):
        zkOper = self.retrieve_zkOper()
        if not is_monitoring(get_localhost_ip(), zkOper):
            self.finish("true")
            return

        conn = self.dba_opers.get_mysql_connection()
        if conn is None:
            self.finish("false")
            return
        '''
        @todo: dbs[0] need to refactor
        '''
        clusterUUID = zkOper.getClusterUUID()

        db_name = None
        dbs = zkOper.retrieve_db_list()
        if [] != dbs:
            db_name = dbs[0]

        user_prop_dict = {}
        if None is not db_name:
            user_prop_dict = zkOper.retrieve_db_user_prop(clusterUUID, db_name)

        try:
            for user_prop in user_prop_dict:
                max_user_connections_rows = self.dba_opers.show_user_max_conn(
                    conn, user_prop, user_prop_dict[user_prop])
                current_user_connections_rows = self.dba_opers.show_user_current_conn(
                    conn, user_prop, user_prop_dict[user_prop])
                if int(current_user_connections_rows
                       ) > int(max_user_connections_rows) * 0.8:
                    self.finish("false")
                    return
        finally:
            conn.close()

        self.finish("true")
コード例 #14
0
    def check(self, data_node_info_list):
        #url_post = "/dbuser/inner/check"
        zkOper = Scheduler_ZkOpers()
        if not is_monitoring(get_localhost_ip(), zkOper):
            return

        monitor_type, monitor_key = "db", "dbuser"
        user_mysql_src_dict, user_zk_src_list = self._get_check_user_list()
        error_record, differ_dict_set = {}, {}
        count_dict_set = dict(total=0, failed=0, success=0)
        if len(user_zk_src_list) == 0 and len(user_mysql_src_dict) == 0:
            error_record.setdefault(
                "msg", "no database users in zk neither in mysql")
            differ_dict_set.setdefault("Empty", "")
        else:
            self.compare_center(user_mysql_src_dict, user_zk_src_list,
                                differ_dict_set, count_dict_set)
            count_dict_set[
                "total"] = count_dict_set["success"] + count_dict_set["failed"]
        alarm_level = self.retrieve_alarm_level(count_dict_set["total"],
                                                count_dict_set["success"],
                                                count_dict_set["failed"])

        total_count = count_dict_set["total"]
        failed_count = count_dict_set["failed"]
        success_count = count_dict_set["success"]
        if differ_dict_set:
            error_record.setdefault("dif", differ_dict_set)

        super(Check_Database_User,
              self).write_status(total_count, success_count, failed_count,
                                 alarm_level, error_record, monitor_type,
                                 monitor_key)
        super(Check_Database_User,
              self).write_status_to_es(total_count, success_count,
                                       failed_count, alarm_level, error_record,
                                       monitor_type, monitor_key)
コード例 #15
0
    def get(self):
        conn = self.dba_opers.get_mysql_connection()
        try:
            dataNodeProKeyValue = self.confOpers.getValue(
                options.data_node_property, ['dataNodeIp'])
            data_node_ip = dataNodeProKeyValue['dataNodeIp']

            zkOper = self.retrieve_zkOper()
            started_ip_list = zkOper.retrieve_started_nodes()
            identifier = socket.gethostname()
            '''
            @todo: review the comment code for arbitrator way
            '''
            #           ret_dict = self.confOpers.getValue(options.data_node_property, ['dataNodeName','dataNodeIp'])
            #           node_name = ret_dict['dataNodeName']
            #           obj = re.search("-n-2", node_name)
            #           if obj != None:
            #               self.finish("true")
            #               return

            if conn is None:
                if data_node_ip in started_ip_list:
                    zkOper.remove_started_node(data_node_ip)
                    self.invokeCommand.run_check_shell(options.kill_innotop)
                self.finish("false")
                return

            zkOper.write_started_node(data_node_ip)

            if not is_monitoring(get_localhost_ip(), zkOper):
                self.finish("true")
                return

            dbName = 'monitor'
            n_time = datetime.datetime.now()

            h = n_time.hour
            min = n_time.minute
            offset = h / 6

            tbName = ''

            prefix_tb_name = 'tem'
            mid_tb_name = str(identifier)
            mid_tb_name_rps = mid_tb_name.replace("-", "_")
            pre_tbname = prefix_tb_name + mid_tb_name_rps
            for i in range(4):
                tbName = pre_tbname + "_" + str(i)
                self.dba_opers.check_create_table(conn, tbName, dbName)

            tbName = pre_tbname + "_" + str(offset)

            del_tbName = ''
            ft = float(time.time())
            if h % 6 == 0 and min <= 59 and (1000000 * ft) % 10 == 0:
                int_tbName = (offset + 2) % 4
                del_tbName = "%s_%s" % (pre_tbname, int_tbName)
                self.dba_opers.delete_tb_contents(conn, del_tbName, dbName)
                logging.info(
                    'delete the contents in database (%s) before 12 hours success!'
                    % (del_tbName))

            str_time = n_time.strftime(TIME_FORMAT)
            self.dba_opers.insert_record_time(conn, str_time, identifier,
                                              tbName, dbName)
            logging.info('Insert time %s into table %s ' % (str_time, tbName))

            record_time = self.dba_opers.query_record_time(
                conn, identifier, tbName, dbName)

        except Exception, e:
            return_flag = 'false'
            logging.error(e)
            self.finish(return_flag)
            return