def matching_analysis(log_file, log_level, warning_interval, matching_dict, notify_dict): logger = log.Logger(log_file, log_level) db = database.db() for matching_file in matching_dict: sql = f"select record_time, matching_context from matching \ where record_time=( \ select max(record_time) from matching \ where matching_context!=? and matching_file=? and matching_key=?\ )" data = db.query_one( sql, ("all", matching_file, matching_dict[matching_file])) logger.logger.debug("分析匹配...") if data is not None: if data[1] != 'Nothing': warning_msg = f"\"{matching_file}\"文件中\"{data[1].strip()}\"行存在关键字\"{matching_dict[matching_file]}\"" msg = f"{matching_file}_{matching_dict[matching_file]}" warning_flag = warning.non_remedial_warning( logger, db, "matching", msg, warning_msg, data[0], warning_interval) if warning_flag: warning_msg = f"日志分析预警:\n{warning_msg}\n" notification.send(logger, warning_msg, notify_dict, msg=msg)
def running_analysis(log_file, log_level, warning_interval, sender_alias, receive, subject): logger = log.Logger(log_file, log_level) logger.logger.debug("开始分析Tomcat运行情况...") db = database.db() """ sql="select record_time, port, pid from tomcat_constant where (port,record_time) in (select port,max(record_time) from tomcat_constant group by port)" sqlite3低版本不支持多列in查询, 无语... """ sql = "select port, pid from tomcat_constant where record_time=(select max(record_time) from tomcat_constant)" data = db.query_all(sql) for i in data: flag = 0 if i[1] == 0: flag = 1 warning_flag = warning.warning(logger, db, flag, i[0], "running", warning_interval) if warning_flag: warning_msg = f"Tomcat预警:\nTomcat({i[0]})未运行\n" mail.send(logger, warning_msg, sender_alias, receive, subject, msg=f'tomcat{i[0]}_running')
def master_slave_analysis(log_file, log_level, seconds_behind_master, warning_interval, sender_alias, receive, subject): logger = log.Logger(log_file, log_level) db = database.db() sql = "select role, slave_io_thread, slave_sql_thread, seconds_behind_master, slave_io_state, slave_sql_state from mysql_slave, mysql_role where mysql_role.record_time=mysql_slave.record_time" data = db.query_one(sql) conn_msg = "slave_conn" delay_msg = "slave_delay" if data is not None and data[0] == "slave": logger.logger.debug("开始分析MySQL主从信息") if data[1].lower() == data[2].lower() == "yes": conn_flag = 0 delay_flag = 1 if data[3] >= seconds_behind_master else 0 else: conn_flag = 1 delay_flag = None for flag, msg in [(conn_flag, conn_msg), (delay_flag, delay_msg)]: if flag is not None: warning_flag = warning.warning(logger, db, flag, "mysql", msg, warning_interval) if warning_flag: warning_msg="MySQL预警:\n"\ "MySQL主从连接:\n"\ f"Slave_IO_Running: {data[1]}\n"\ f"Slave_SQL_Running: {data[2]}\n"\ f"Slave_IO_State: {data[4]}\n"\ f"Slave_SQL_Running_State: {data[5]}\n"\ f"Seconds_Behind_Master: {data[3]}" mail.send(logger, warning_msg, sender_alias, receive, subject, msg=msg)
def master_slave_analysis(log_file, log_level, warning_interval, sender_alias, receive, subject): logger = log.Logger(log_file, log_level) db = database.db() sql = "select a.role, a.master_link_status, a.master_host from redis_slave as a,redis_role as b where a.record_time=b.record_time and a.role=b.role" data = db.query_one(sql) if data is not None: logger.logger.debug("开始分析Redis主从信息") if data[1] == "up" or data[1] == "online": flag = 0 else: flag = 1 else: flag = 0 warning_flag = warning.warning(logger, db, flag, "redis", "slave", warning_interval) if warning_flag: warning_msg = f"Redis预警:\nRedis slave无法连接master({data[2]})\n" mail.send(logger, warning_msg, sender_alias, receive, subject, msg=f'redis_slave')
def jvm_analysis(log_file, log_level, warning_interval, sender_alias, receive, subject): logger = log.Logger(log_file, log_level) db = database.db() logger.logger.debug("开始分析Jvm内存情况...") java_version = db.query_one("select version from tomcat_java_version")[0] table_name = f"tomcat_jstat{java_version}" sql = f"select port, ygc, ygct, fgc, fgct from {table_name} where record_time=(select max(record_time) from {table_name})" data = db.query_all(sql) ygc_warning_time = 1 fgc_warning_time = 10 #ygc_warning_time=0.01 #fgc_warning_time=0 for i in data: port = i[0] if i[1] == 0: ygc_time = 0 else: ygc_time = i[2] / i[1] if i[3] == 0: fgc_time = 0 else: fgc_time = i[4] / i[3] ygc_flag = 0 if ygc_time >= ygc_warning_time: ygc_flag = 1 logger.logger.warning(f"Tomcat({port})的YGC平均时间: {ygc_time}") warning_flag = warning.warning(logger, db, ygc_flag, port, "ygc", warning_interval) if warning_flag: warning_msg = f"Tomcat预警:\nTomcat({port})YGC平均时间为{ygc_time}\n" mail.send(logger, warning_msg, sender_alias, receive, subject, msg=f'tomcat{port}_ygc') fgc_flag = 0 if fgc_time >= fgc_warning_time: fgc_flag = 1 logger.logger.warning(f"Tomcat({port})的FGC平均时间: {fgc_time}") warning_flag = warning.warning(logger, db, fgc_flag, port, "fgc", warning_interval) if warning_flag: warning_msg = f"Tomcat预警:\nTomcat({port})FGC平均时间为{fgc_time}\n" mail.send(logger, warning_msg, sender_alias, receive, subject, msg=f'tomcat{port}_fgc')
def cpu_record(log_file, log_level): logger = log.Logger(log_file, log_level) db = database.db() logger.logger.debug("记录cpu信息...") sql = "insert into cpu values(?, ?, ?)" record_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") cpu_count = psutil.cpu_count() cpu_used_percent = psutil.cpu_percent(interval=5) db.update_one(sql, (record_time, cpu_count, cpu_used_percent))
def swap_record(log_file, log_level): logger=log.Logger(log_file, log_level) db=database.db() logger.logger.debug("记录交换分区信息...") record_time=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") sql="insert into swap values(?, ?, ?, ?, ?)" swap_mem=psutil.swap_memory() total, used, used_percent, free=swap_mem[0], swap_mem[1], swap_mem[3], swap_mem[2] db.update_one(sql, (record_time, total, used, used_percent, free))
def memory_record(log_file, log_level): logger=log.Logger(log_file, log_level) db=database.db() logger.logger.debug("记录内存信息...") record_time=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") mem=psutil.virtual_memory() sql="insert into memory values(?, ?, ?, ?, ?, ?)" total, avail, used, used_percent, free=mem[0], mem[1], mem[3], mem[2], mem[4] db.update_one(sql, (record_time, total, avail, used, used_percent, free))
def running_analysis(log_file, log_level, warning_interval, notify_dict): logger=log.Logger(log_file, log_level) logger.logger.debug("开始分析Redis运行情况...") db=database.db() sql="select port, pid from redis_constant where record_time=(select max(record_time) from redis_constant)" port, pid=db.query_one(sql) flag= 1 if pid==0 else 0 # 是否预警 warning_flag=warning.warning(logger, db, flag, "redis", "running", warning_interval) if warning_flag: warning_msg=f"Redis预警:\nRedis({port})未运行" notification.send(logger, warning_msg, notify_dict, msg=f'redis_running')
def analysis(log_file, log_level, warning_interval, sender_alias, receive, subject): logger = log.Logger(log_file, log_level) db = database.db() logger.logger.debug(f"分析用户的资源限制...") sql = "select user, nofile, nproc from users_limit where record_time=(select max(record_time) from users_limit)" data = db.query_all(sql) min_limit = 5000 for i in data: flag = 0 arg = "nofile" if i[1].isdigit(): if int(i[1]) < min_limit: flag = 1 cmd = f"echo '{i[0]} - {arg} 65536' >> /etc/security/limits.conf" warning_msg=f"用户资源限制预警:\n" \ f"用户({i[0]})的{arg}参数值({i[1]})过低.\n"\ f"请在root用户下执行命令: {cmd}, 然后重启登录该用户再重启该用户下相应软件" warning_flag = warning.warning(logger, db, flag, f"{i[0]}_limit", arg, warning_interval) if warning_flag: mail.send(logger, warning_msg, sender_alias, receive, subject, msg=f"{i[0]}_limit nofile") flag = 0 arg = "nproc" if i[2].isdigit(): if int(i[2]) < min_limit: flag = 1 cmd = f"echo '{i[0]} - {arg} 65536' >> /etc/security/limits.conf" warning_msg=f"用户资源限制预警:\n" \ f"用户({i[0]})的{arg}参数值({i[2]})过低.\n"\ f"请在root用户下执行命令: {cmd}, 然后重启登录该用户再重启该用户下相应软件" warning_flag = warning.warning(logger, db, flag, f"{i[0]}_nproc_limit", arg, warning_interval) if warning_flag: mail.send(logger, warning_msg, sender_alias, receive, subject, msg=f"{i[0]}_limit nproc")
def memory_analysis(log_file, log_level, warning_percent, warning_interval, notify_dict): logger=log.Logger(log_file, log_level) db=database.db() sql="select record_time, used_percent from memory order by record_time desc" data=db.query_one(sql) mem_used_percent=float(data[1]) logger.logger.debug("分析Mem...") flag=0 # 是否有预警信息 if mem_used_percent > warning_percent: flag=1 logger.logger.warning(f"内存当前使用率当前已达到{mem_used_percent}%") warning_flag=warning.warning(logger, db, flag, "mem", "used_percent", warning_interval) if warning_flag: warning_msg=f"内存预警:\n内存当前使用率当前已达到{mem_used_percent}%" notification.send(logger, warning_msg, notify_dict, msg='mem_used_percent')
def clean(): log_file, log_level = log.get_log_args() logger = log.Logger(log_file, log_level) logger.logger.info("清理程序启动...") keep_days = conf.get("autocheck", "keep_days")[0] scheduler = BlockingScheduler() scheduler.add_job(clean_data, 'cron', args=[logger, int(keep_days)], day_of_week='0-6', hour=1, minute=10, id=f'clean') scheduler.start()
def disk_analysis(log_file, log_level, warning_percent, warning_interval, notify_dict): logger=log.Logger(log_file, log_level) db=database.db() sql=f"select record_time, name, used_percent, mounted from disk where record_time=(select max(record_time) from disk)" data=db.query_all(sql) logger.logger.debug("分析disk...") for i in data: flag=0 # 是否有预警信息 if i[2] >= warning_percent: flag=1 logger.logger.warning(f"{i[3]}目录({i[1]})已使用{i[2]}%") warning_flag=warning.warning(logger, db, flag, "disk", i[3], warning_interval) if warning_flag: warning_msg=f"磁盘预警:\n{i[3]}目录({i[1]})已使用{i[2]}%\n" notification.send(logger, warning_msg, notify_dict, msg=i[3])
def record(log_file, log_level, user): logger = log.Logger(log_file, log_level) logger.logger.info(f"记录用户{user}的资源限制...") record_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") cmd = f'su - {user} -c "ulimit -n -u"' (status, message) = subprocess.getstatusoutput(cmd) if status == 0: message = message.splitlines() nofile = message[0].split()[-1] nproc = message[1].split()[-1] db = database.db() sql = "insert into users_limit values(?, ?, ?, ?)" db.update_one(sql, (record_time, user, nofile, nproc)) else: logger.logger.error(f"命令'{cmd}'执行报错")
def cpu_analysis(log_file, log_level, warning_percent, warning_interval, notify_dict): logger=log.Logger(log_file, log_level) db=database.db() sql="select record_time, cpu_used_percent from cpu order by record_time desc" data=db.query_one(sql) cpu_used_percent=float(data[1]) logger.logger.debug("分析CPU...") flag=0 # 是否有预警信息 if cpu_used_percent >= warning_percent: flag=1 logger.logger.warning(f"CPU当前使用率已达到{cpu_used_percent}%") warning_flag=warning.warning(logger, db, flag, "cpu", "used_percent", warning_interval) if warning_flag: warning_msg=f"CPU预警:\nCPU使用率当前已达到{cpu_used_percent}%" notification.send(logger, warning_msg, notify_dict, msg='cpu_used_percent')
def boot_time_record(log_file, log_level): logger = log.Logger(log_file, log_level) db = database.db() logger.logger.debug("记录服务器启动时间信息...") record_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") boot_time = datetime.datetime.fromtimestamp( psutil.boot_time()).strftime("%Y-%m-%d %H:%M:%S") """ # 判断启动时间是否有变化再插入 sql="select boot_time from boot_time order by record_time desc limit 1;" data=db.query_one(sql) if data is None or data[0]!=boot_time: sql="insert into boot_time values(?, ?)" db.update_one(sql, (record_time, boot_time)) """ sql = "insert into boot_time values(?, ?)" db.update_one(sql, (record_time, boot_time))
def running_analysis(log_file, log_level, warning_interval, sender_alias, receive, subject): logger = log.Logger(log_file, log_level) logger.logger.debug("开始分析MySQL运行情况...") db = database.db() sql = "select port, pid from mysql_constant where record_time=(select max(record_time) from mysql_constant)" port, pid = db.query_one(sql) flag = 1 if pid == 0 else 0 # 是否预警 warning_flag = warning.warning(logger, db, flag, "mysql", "running", warning_interval) if warning_flag: warning_msg = f"MySQL预警:\nMySQL({port})未运行" mail.send(logger, warning_msg, sender_alias, receive, subject, msg=f'mysql_running')
def tablespace_analysis(log_file, log_level, warning_percent, warning_interval, notify_dict): logger = log.Logger(log_file, log_level) db = database.db() sql = f"select tablespace_name, used_percent from oracle where record_time=(select max(record_time) from oracle)" data = db.query_all(sql) logger.logger.debug("分析表空间...") for i in data: flag = 0 # 是否有预警信息 if i[1] >= warning_percent: flag = 1 logger.logger.warning(f"{i[0]}表空间已使用{i[1]}%") warning_flag = warning.warning(logger, db, flag, "oracle", i[0], warning_interval) if warning_flag: warning_msg = f"Oracle表空间预警:\n{i[0]}表空间已使用{i[1]}%" notification.send(logger, warning_msg, notify_dict, msg=i[0])
def analysis(log_file, log_level, directory, warning_interval, notify_dict): """对备份文件进行预警 1. 备份目录不存在则提示 2. 当天的备份文件未生成则提示 3. 当天的备份文件小于上一个的大小的99%则提示 """ logger = log.Logger(log_file, log_level) db = database.db() logger.logger.info(f"分析备份目录{directory}文件...") sql = "select record_time, directory, filename, size, ctime from backup where directory=? order by record_time, ctime desc limit 2" data = db.query_all(sql, (directory, )) now_time = datetime.datetime.now().strftime("%Y-%m-%d") flag = 0 # 是否有预警信息 value = None if len(data) < 2: if data[0][2] is None: flag = 1 value = "dir_is_None" warning_msg = f"备份预警:\n备份目录({directory})不存在" else: if now_time not in data[0][4]: flag = 1 warning_msg = f"备份预警:\n备份目录({directory})当天备份文件未生成" value = "file_is_None" else: if now_time not in data[0][4]: flag = 1 warning_msg = f"备份预警:\n备份目录({directory})当天备份文件未生成" value = "file_is_None" elif data[0][3] < data[1][3] * 0.99: flag = 1 warning_msg = f"备份预警:\n备份目录({directory})当天备份文件({format_size(data[0][3])})与上一次({format_size(data[1][3])})相比相差较大" value = "file_is_small" warning_flag = warning.warning(logger, db, flag, f"backup {directory}", value, warning_interval) if warning_flag: notification.send(logger, warning_msg, notify_dict, msg=f"{directory} {value}")
def disk_record(log_file, log_level): logger=log.Logger(log_file, log_level) db=database.db() logger.logger.debug("记录磁盘信息...") record_time=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") disk_list=[] all_disk=psutil.disk_partitions() for i in all_disk: disk_name=i[0] mounted=i[1] size=psutil.disk_usage(mounted) total=size[0] used=size[1] used_percent=size[3] free=size[2] disk_list.append((record_time, disk_name, total, used, used_percent, free, mounted)) sql="insert into disk values(?, ?, ?, ?, ?, ?, ?)" db.update_all(sql, disk_list)
def record(log_file, log_level, directory, regular): logger=log.Logger(log_file, log_level) logger.logger.info(f"记录备份目录{directory}的信息...") record_time=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") backup_info=[] if os.path.exists(directory): flag=0 for i in os.listdir(directory): filename=f"{directory}/{i}" if os.path.isfile(filename) and filename.endswith(regular): size=os.path.getsize(filename) ctime=datetime.datetime.fromtimestamp(os.path.getctime(filename)).strftime("%Y-%m-%d %H:%M:%S") backup_info.append((record_time, directory, i, size, ctime)) flag=1 if flag==0: backup_info.append((record_time, directory, None, None, None)) else: backup_info.append((record_time, directory, None, None, None)) db=database.db() delete_sql="delete from backup where directory=?" db.update_one(delete_sql, [directory]) sql="insert into backup values(?, ?, ?, ?, ?)" db.update_all(sql, backup_info)
def record(log_file, log_level): logger = log.Logger(log_file, log_level) logger.logger.debug("记录表空间信息") db = database.db() record_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") # 表空间 sql = """ set heading off trimspool on feedback off pagesize 0 linesize 1000 SELECT a.tablespace_name , a.bytes, ( a.bytes - b.bytes ), b.bytes, Round(( ( a.bytes - b.bytes ) / a.bytes ) * 100, 2) FROM (SELECT tablespace_name, SUM(bytes) bytes FROM dba_data_files GROUP BY tablespace_name) a, (SELECT tablespace_name, SUM(bytes) bytes, Max(bytes) largest FROM dba_free_space GROUP BY tablespace_name) b WHERE a.tablespace_name = b.tablespace_name ORDER BY ( ( a.bytes - b.bytes ) / a.bytes ) DESC; """ cmd = f"su - oracle -c 'sqlplus -S / as sysdba <<EOF\n{sql}\nEOF'" (status, message) = subprocess.getstatusoutput(cmd) if status == 0: data_list = [] for i in message.splitlines(): i = i.split() data_list.append((record_time, i[0], i[1], i[2], i[4], i[3])) sql = "insert into oracle values(?, ?, ?, ?, ?, ?)" db.update_all(sql, data_list) else: sql = "insert into error value(?, ?, ?, ?, ?)" db.update_one(sql, (record_time, 'Oracle', 'connect', '无法连接Oracle', 0))
def record(log_file, log_level, mysql_user, mysql_ip, mysql_password, mysql_port): logger = log.Logger(log_file, log_level) db = database.db() record_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") logger.logger.debug("记录MySQL信息") port = int(mysql_port) pid = tools.find_pid(port) if pid == 0: logger.logger.error(f"MySQL({port})未运行") sql = "insert into mysql_constant(record_time, pid, port, boot_time) values(?, ?, ?, ?)" db.update_one(sql, (record_time, pid, port, "0")) role = db.query_one("select role from mysql_role")[0] db.update_one("update mysql_role set record_time=?, role=?", ("0", role)) else: try: conn = pymysql.connect( host=mysql_ip, port=port, user=mysql_user, #cursorclass=pymysql.cursors.DictCursor, # 返回值带字段名称 password=mysql_password) with conn.cursor() as cursor: mysql_info = psutil.Process(pid).as_dict() # 写入mysql_constant表 mysql_create_time = datetime.datetime.fromtimestamp( mysql_info["create_time"]).strftime("%Y-%m-%d %H:%M:%S") sql = "insert into mysql_constant values(?, ?, ?, ?)" db.update_one(sql, (record_time, pid, port, mysql_create_time)) mysql_memory_percent = mysql_info['memory_percent'] mysql_memory = psutil.virtual_memory( )[0] * mysql_memory_percent / 100 # 获取连接数 sql = 'show status where variable_name in ("threads_connected")' cursor.execute(sql) mysql_connected_num = cursor.fetchone()[1] mysql_num_threads = mysql_info["num_threads"] sql = "insert into mysql_variable values(?, ?, ?, ?, ?, ?)" db.update_one( sql, (record_time, pid, mysql_memory, mysql_memory_percent, mysql_connected_num, mysql_num_threads)) # 判断主从状态 logger.logger.debug("记录MySQL集群信息...") sql = 'show slave status' cursor.execute(sql) slave_status = cursor.fetchall() if len(slave_status) == 0: # master信息 role = "master" sql = 'show slave hosts' cursor.execute(sql) slave_num = len(cursor.fetchall()) sql = 'show master status' cursor.execute(sql) master_data = cursor.fetchone() binlog_do_db = master_data[2] binlog_ignore_db = master_data[3] sql = 'replace into mysql_master values(?, ?, ?, ?, ?)' db.update_one(sql, (record_time, pid, slave_num, binlog_do_db, binlog_ignore_db)) else: # slave信息 role = "slave" slave_list = [] for i in slave_status: master_host = i[1] master_port = i[3] replicate_do_db = i[12] replicate_ignore_db = i[13] slave_io_thread = i[10] slave_io_state = i[0] slave_sql_thread = i[11] slave_sql_state = i[44] master_uuid = i[40] retrieved_gtid_set = i[51] #executed_gtid_set=i[52] executed_gtid_set = i[52].replace('\n', ' ', -1) seconds_behind_master = i[32] slave_list.append((record_time, pid, master_host, master_port, replicate_do_db, replicate_ignore_db, \ slave_io_thread, slave_io_state, slave_sql_thread, slave_sql_state, \ master_uuid, retrieved_gtid_set, executed_gtid_set, seconds_behind_master)) sql = 'insert into mysql_slave values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)' db.update_all(sql, slave_list) db.update_one("update mysql_role set record_time=?, role=?", (record_time, role)) except Exception as e: logger.logger.error(f"无法连接MySQL: {e}") sql = "insert into error values(?, ?, ?, ?, ?)" db.update_one(sql, (record_time, "MySQL", "connection", str(e), 0)) else: conn.close()
from django.http import HttpResponseNotFound from django.http import HttpResponseForbidden from django.views.defaults import server_error, bad_request from django.contrib.sites.shortcuts import get_current_site from forge_django.settings import base as settings from home.models import AutodeskAccounts from lib import log from lib import httpHelper from lib import forgeAdopter from lib import forgeCacheController # Global Variables logger = log.Logger() forgeDM = forgeAdopter.ForgeDataManagementAdopter( settings.ADSK_FORGE['FORGE_CLIENT_ID'], settings.ADSK_FORGE['FORGE_CLIENT_SECRET']) forgeCC = forgeCacheController.ForgeCacheController( settings.ADSK_FORGE['FORGE_CLIENT_ID'], settings.ADSK_FORGE['FORGE_CLIENT_SECRET']) forgeOA = forgeAdopter.ForgeOAuthAdopter( settings.ADSK_FORGE['FORGE_CLIENT_ID'], settings.ADSK_FORGE['FORGE_CLIENT_SECRET']) pp = pprint.PrettyPrinter(indent=4) http = httpHelper.HttpHelper() OAUTH_CALLBACK_URL = settings.ADSK_FORGE['FORGE_AUTH_CALLBACK'] ''' ====================================================================================================== View Utilities
def record(): log_file, log_level = log.get_log_args() logger = log.Logger(log_file, log_level) logger.logger.info("开始采集资源信息...") max_threads = 50 executors = {"default": ThreadPoolExecutor(max_threads)} job_defaults = { "coalesce": True, "max_instances": 1, "misfire_grace_time": 3, } scheduler = BlockingScheduler(job_defaults=job_defaults, executors=executors) min_value = 10 # host资源记录 logger.logger.info("开始采集主机资源信息...") disk_interval, cpu_interval, memory_interval, swap_interval, users_limit = conf.get( "host", "disk_interval", "cpu_interval", "memory_interval", "swap_interval", "users_limit") if int(disk_interval) < min_value: disk_interval = min_value if int(cpu_interval) < min_value: cpu_interval = min_value if int(memory_interval) < min_value: memory_interval = min_value if int(swap_interval) < min_value: swap_interval = min_value logger.logger.info("开始采集磁盘资源信息...") scheduler.add_job(host.disk_record, 'interval', args=[log_file, log_level], seconds=int(disk_interval), id='disk_record') logger.logger.info("开始采集CPU资源信息...") scheduler.add_job(host.cpu_record, 'interval', args=[log_file, log_level], seconds=int(cpu_interval), id='cpu_record') logger.logger.info("开始采集内存资源信息...") scheduler.add_job(host.memory_record, 'interval', args=[log_file, log_level], seconds=int(memory_interval), id='memory_record') logger.logger.info("开始采集Swap资源信息...") scheduler.add_job(host.swap_record, 'interval', args=[log_file, log_level], seconds=int(swap_interval), id='swap_record') logger.logger.info("开始采集启动时间资源信息...") #scheduler.add_job(host.boot_time_record, 'interval', args=[log_file, log_level], seconds=int(boot_time_interval), id='boot_time_record') host.boot_time_record(log_file, log_level) # 用户资源限制 logger.logger.info("开始记录用户限制信息...") if users_limit is not None: users_limit_list = [] for i in users_limit.split(","): users_limit_list.append(i.strip()) for user in users_limit_list: scheduler.add_job(user_resource.record, 'interval', args=[log_file, log_level, user], next_run_time=datetime.datetime.now() + datetime.timedelta(seconds=5), minutes=60, id=f'{user}_limit') # tomcat资源 tomcat_check, tomcat_interval, tomcat_port = conf.get( "tomcat", "check", "tomcat_interval", "tomcat_port", ) if tomcat_check == '1': logger.logger.info("开始采集Tomcat资源信息...") tomcat_port_list = [] # 将tomcat_port参数改为列表 for i in tomcat_port.split(","): tomcat_port_list.append(i.strip()) if int(tomcat_interval) < min_value: tomcat_interval = min_value scheduler.add_job(tomcat.record, 'interval', args=[log_file, log_level, tomcat_port_list], seconds=int(tomcat_interval), id='tomcat_record') # redis资源 redis_check, redis_interval, redis_password, redis_port, sentinel_port, sentinel_name, commands = conf.get( "redis", "check", "redis_interval", "password", "redis_port", "sentinel_port", "sentinel_name", "commands") if redis_check == "1": if int(redis_interval) < min_value: redis_interval = min_value logger.logger.info("开始采集Redis资源信息...") scheduler.add_job(redis.record, 'interval', args=[log_file, log_level, redis_password, redis_port, sentinel_port, sentinel_name, commands], \ seconds=int(redis_interval), id='redis_record') # backup backup_check, backup_dir, backup_regular, backup_cron_time = conf.get( "backup", "check", "dir", "regular", "cron_time") if backup_check == "1": logger.logger.info("开始记录备份信息...") dir_list = [] for i in backup_dir.split(","): dir_list.append(i.strip()) regular_list = [] for i in backup_regular.split(","): regular_list.append(i.strip()) cron_time_list = [] for i in backup_cron_time.split(","): cron_time_list.append(i.strip()) for i in range(len(dir_list)): directory = dir_list[i] regular = regular_list[i] cron_time = cron_time_list[i].split(":") hour = cron_time[0].strip() minute = cron_time[1].strip() scheduler.add_job(backup.record, 'cron', args=[log_file, log_level, directory, regular], next_run_time=datetime.datetime.now(), day_of_week='0-6', hour=int(hour), minute=int(minute), id=f'backup{i}') # 记录mysql mysql_check, mysql_interval, mysql_user, mysql_ip, mysql_port, mysql_password = conf.get( "mysql", "check", "mysql_interval", "mysql_user", "mysql_ip", "mysql_port", "mysql_password") if mysql_check == "1": if int(mysql_interval) < min_value: mysql_interval = min_value logger.logger.info("开始采集MySQL资源信息...") scheduler.add_job(mysql.record, 'interval', args=[ log_file, log_level, mysql_user, mysql_ip, mysql_password, mysql_port ], seconds=int(mysql_interval), id='mysql_record') # 记录Oracle oracle_check, oracle_interval = conf.get("oracle", "check", "oracle_interval") if oracle_check == "1": if int(oracle_interval) < min_value: oracle_interval = min_value logger.logger.info("开始记录Oracle信息...") scheduler.add_job(oracle.record, 'interval', args=[log_file, log_level], seconds=int(oracle_interval), id='oracle_record') # 记录并分析匹配 matching_check, matching_files, matching_keys, matching_interal = conf.get( "matching", "check", "matching_files", "matching_keys", "matching_interval") if matching_check == "1": matching_min_value = 1 if int(matching_interal) < matching_min_value: matching_check = matching_min_value logger.logger.info("开始采集匹配信息...") matching_dict = dict( zip([x.strip() for x in matching_files.split(",")], [x.strip() for x in matching_keys.split(",")])) record_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") db = database.db() for matching_file in matching_dict: if os.path.exists(matching_file): sql = "insert into matching values(?, ?, ?, ?, ?)" filesize = os.stat(matching_file)[6] db.update_one(sql, (record_time, matching_file, matching_dict[matching_file], "all", filesize)) else: logger.logger.error( f"Error: [matching]配置中文件{matching_file}不存在") matching_dict.pop(matching_file) scheduler.add_job(matching.matching_records, 'interval', args=[log_file, log_level, matching_dict], seconds=int(matching_interal), id=f'matching') scheduler.start()
def record(log_file, log_level, tomcat_port_list): logger = log.Logger(log_file, log_level) db = database.db() tomcat_port_and_pid = {} for port in tomcat_port_list: tomcat_port_and_pid[port] = tools.find_pid(int(port)) record_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") for i in tomcat_port_and_pid: # 根据pid获取相应信息 pid = tomcat_port_and_pid[i] port = i logger.logger.debug(f"记录Tomcat({port})资源") if pid == 0: logger.logger.error(f"Tomcat({port})未运行") sql = "insert into tomcat_constant(record_time, pid, port, boot_time) values(?, ?, ?, ?)" db.update_one(sql, (record_time, pid, port, "0")) else: tomcat_info = psutil.Process(pid).as_dict() tomcat_create_time = datetime.datetime.fromtimestamp( tomcat_info["create_time"]).strftime("%Y-%m-%d %H:%M:%S") tomcat_cmdline = ",".join(tomcat_info["cmdline"]) constant_data = (record_time, pid, port, tomcat_create_time, tomcat_cmdline) constant_sql = "insert into tomcat_constant values(?, ?, ?, ?, ?)" db.update_one(constant_sql, constant_data) tomcat_memory_percent = tomcat_info['memory_percent'] tomcat_memory = psutil.virtual_memory( )[0] * tomcat_info['memory_percent'] / 100 tomcat_connections = len(tomcat_info["connections"]) tomcat_num_threads = tomcat_info["num_threads"] variable_data = (record_time, pid, port, tomcat_memory, tomcat_memory_percent, tomcat_connections, tomcat_num_threads) variable_sql = "insert into tomcat_variable values(?, ?, ?, ?, ?, ?, ?)" db.update_one(variable_sql, variable_data) # 内存回收 logger.logger.debug(f"记录Tomcat({port})Jvm信息") cmd = f"jstat -gcutil {pid}" (status, message) = subprocess.getstatusoutput(cmd) message = message.splitlines() header = message[0].split() if len(header) == 11: # jdk8 fields = [ "S0", "S1", "E", "O", "M", "CCS", "YGC", "YGCT", "FGC", "FGCT", "GCT", "record_time", "port" ] sql = f"insert into tomcat_jstat8({','.join(fields)}) values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" java_version = 8 else: # jdk7 fields = [ "S0", "S1", "E", "O", "P", "YGC", "YGCT", "FGC", "FGCT", "GCT", "record_time", "port" ] sql = f"insert into tomcat_jstat7({','.join(fields)}) values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" java_version = 7 java_version_sql = "update tomcat_java_version set version=?" db.update_one(java_version_sql, (java_version, )) # 将java的版本写入数据库 #logger.logger.debug(f"java version: {java_version}") data_index_list = [] # 按照fields的顺序从header中获取字段索引 for i in fields[:-2]: index = header.index(i) data_index_list.append(index) data_list = [] # 将jstat的数据按照data_index_list中的索引顺序放到data_list中 data = message[1].split() for i in data_index_list: data_list.append(data[i]) else: data_list.extend([record_time, port]) db.update_one(sql, data_list)
def resource_show(hostname, check_dict, granularity_level, sender_alias, receive, subject): log_file, log_level = log.get_log_args() logger = log.Logger(log_file, log_level) db = database.db() now_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") modifier = "-24 hour" message = "" # 重置统计文件 report_dir = "report" shutil.rmtree(report_dir, ignore_errors=True) os.makedirs(report_dir, exist_ok=True) logger.logger.info("统计资源记录信息...") printf(f"统计开始时间: {now_time}") printf(f"主机名: {hostname}") printf("-" * 100) # 系统启动时间 sql = "select boot_time from boot_time order by record_time desc" boot_time = db.query_one(sql)[0] printf(f"系统启动时间: {boot_time}") printf("*" * 100) # 磁盘 logger.logger.info("统计Disk记录信息...") printf("磁盘统计:") sql = "select distinct mounted from disk" disk_names = db.query_all(sql) disk_granularity_level = int(60 / int(check_dict['host_check'][0]) * granularity_level) disk_granularity_level = disk_granularity_level if disk_granularity_level != 0 else 1 for i in disk_names: i = i[0] table = pt.PrettyTable( ["记录时间", "挂载点", "磁盘名称", "磁盘大小", "已使用大小", "已使用百分比", "可用"]) sql=f"select record_time, name, total, used, used_percent, avail from disk "\ f"where mounted=? "\ f"and record_time > datetime('{now_time}', '{modifier}') "\ f"order by record_time" disk_data = db.query_all(sql, (i, )) for index, item in enumerate(disk_data): if index % disk_granularity_level == 0 or index == 0: total = format_size(item[2]) used = format_size(item[3]) used_percent = f"{item[4]}%" avail = format_size(item[5]) table.add_row( (item[0], i, item[1], total, used, used_percent, avail)) printf(f"{i}磁盘统计:") printf(table) printf("*" * 100) # CPU logger.logger.info("统计CPU记录信息...") printf("CPU统计:") cpu_granularity_level = int(60 / int(check_dict['host_check'][1]) * granularity_level) cpu_granularity_level = cpu_granularity_level if cpu_granularity_level != 0 else 1 table = pt.PrettyTable(["记录时间", "CPU核心数", "CPU使用率"]) sql=f"select record_time, cpu_count, cpu_used_percent from cpu "\ f"where record_time > datetime('{now_time}', '{modifier}') "\ f"order by record_time" cpu_data = db.query_all(sql) for index, item in enumerate(cpu_data): if index % cpu_granularity_level == 0 or index == 0: used_percent = f"{item[2]}%" table.add_row((item[0], item[1], used_percent)) printf(table) printf("*" * 100) # MEM logger.logger.info("统计Mem记录信息...") printf("内存统计:") mem_granularity_level = int(60 / int(check_dict['host_check'][2]) * granularity_level) mem_granularity_level = mem_granularity_level if mem_granularity_level != 0 else 1 table = pt.PrettyTable( ["记录时间", "内存大小", "可用(avail)", "已使用", "已使用百分比", "剩余(free)"]) sql=f"select record_time, total, avail, used, used_percent, free from memory "\ f"where record_time > datetime('{now_time}', '{modifier}') "\ f"order by record_time" mem_data = db.query_all(sql) for index, item in enumerate(mem_data): if index % mem_granularity_level == 0 or index == 0: total = format_size(item[1]) avail = format_size(item[2]) used = format_size(item[3]) used_percent = f"{item[4]}%" free = format_size(item[5]) table.add_row((item[0], total, avail, used, used_percent, free)) printf(table) printf("*" * 100) # Swap logger.logger.info("统计Swap记录信息...") printf("Swap统计:") swap_granularity_level = int(60 / int(check_dict['host_check'][3]) * granularity_level) swap_granularity_level = swap_granularity_level if swap_granularity_level != 0 else 1 table = pt.PrettyTable(["记录时间", "Swap大小", "已使用", "已使用百分比", "剩余"]) sql=f"select record_time, total, used, used_percent, free from swap "\ f"where record_time > datetime('{now_time}', '{modifier}') "\ f"order by record_time" swap_data = db.query_all(sql) for index, item in enumerate(swap_data): if index % swap_granularity_level == 0 or index == 0: total = format_size(item[1]) used = format_size(item[2]) used_percent = f"{item[3]}%" free = format_size(item[4]) table.add_row((item[0], total, used, used_percent, free)) printf(table) printf("*" * 100) # Tomcat if check_dict["tomcat_check"][0] == "1": logger.logger.info("统计Tomcat记录信息...") printf("Tomcat统计:") tomcat_granularity_level = int( 60 / int(check_dict['tomcat_check'][1]) * granularity_level) tomcat_granularity_level = tomcat_granularity_level if tomcat_granularity_level != 0 else 1 version = db.query_one("select version from tomcat_java_version")[0] printf(f"Java版本: {version}") printf("*" * 100) #sql="select distinct port from tomcat_constant" #tomcat_ports=db.query_all(sql) tomcat_ports = conf.get("tomcat", "tomcat_port")[0].split(",") tomcat_constant_data = [] for i in tomcat_ports: port = int(i.strip()) constant_sql=f"select record_time, pid, port, boot_time, cmdline from tomcat_constant "\ f"where port=? "\ f"and '{now_time}' >= record_time "\ f"order by record_time desc" variable_sql=f"select record_time, pid, men_used, mem_used_percent, connections, threads_num from tomcat_variable "\ f"where port=? "\ f"and record_time > datetime('{now_time}', '{modifier}') "\ f"order by record_time" if version == "8": jvm_sql=f"select record_time, S0, S1, E, O, M, CCS, YGC, YGCT, FGC, FGCT, GCT from tomcat_jstat8 "\ f"where port=? "\ f"and record_time > datetime('{now_time}', '{modifier}') "\ f"order by record_time" jvm_table = pt.PrettyTable([ "记录时间", "S0", "S1", "E", "O", "M", "CCS", "YGC", "YGCT", "FGC", "FGCT", "GCT" ]) elif version == "7": jvm_sql=f"select record_time, S0, S1, E, O, P, YGC, YGCT, FGC, FGCT, GCT from tomcat_jstat7 "\ f"where port=? "\ f"and record_time > datetime('{now_time}', '{modifier}') "\ f"order by record_time" jvm_table = pt.PrettyTable([ "记录时间", "S0", "S1", "E", "O", "P", "YGC", "YGCT", "FGC", "FGCT", "GCT" ]) constant_table = pt.PrettyTable( ["记录时间", "Pid", "端口", "启动时间", "启动参数"]) tomcat_constant_data = (db.query_one(constant_sql, (port, ))) constant_table.add_row(tomcat_constant_data) variable_table = pt.PrettyTable( ["记录时间", "Pid", "内存使用", "内存使用率", "连接数", "线程数"]) tomcat_variable_data = (db.query_all(variable_sql, (port, ))) for index, item in enumerate(tomcat_variable_data): if index % tomcat_granularity_level == 0 or index == 0: mem_used = format_size(item[2]) mem_used_percent = f"{item[3]:.2f}%" variable_table.add_row( (item[0], item[1], mem_used, mem_used_percent, item[4], item[5])) tomcat_jvm_data = (db.query_all(jvm_sql, (port, ))) for index, item in enumerate(tomcat_jvm_data): if index % tomcat_granularity_level == 0 or index == 0: jvm_table.add_row(item) printf(f"Tomcat({port})统计信息:") printf("启动信息:") printf(constant_table) printf("运行信息:") printf(variable_table) printf("Jvm内存信息:") printf(jvm_table) printf("*" * 100) # Redis if check_dict["redis_check"][0] == "1": logger.logger.info("统计Redis记录信息...") printf("Redis统计:") redis_granularity_level = int(60 / int(check_dict['redis_check'][1]) * granularity_level) redis_granularity_level = redis_granularity_level if redis_granularity_level != 0 else 1 printf("*" * 100) constant_sql=f"select record_time, pid, port, boot_time from redis_constant "\ f"where '{now_time}' >= record_time "\ f"order by record_time desc" variable_sql=f"select record_time, pid, mem_used, mem_used_percent, connections, threads_num from redis_variable "\ f"where record_time > datetime('{now_time}', '{modifier}') "\ f"order by record_time" # 启动信息 constant_table = pt.PrettyTable(["记录时间", "Pid", "端口", "启动时间"]) constant_data = (db.query_one(constant_sql)) constant_table.add_row(constant_data) # 运行信息 variable_table = pt.PrettyTable( ["记录时间", "Pid", "内存使用", "内存使用率", "连接数", "线程数"]) variable_data = (db.query_all(variable_sql)) for index, item in enumerate(variable_data): if index % tomcat_granularity_level == 0 or index == 0: mem_used = format_size(item[2]) mem_used_percent = f"{item[3]:.2f}%" variable_table.add_row((item[0], item[1], mem_used, mem_used_percent, item[4], item[5])) # master_slave信息 role = db.query_one("select role from redis_role")[0] if role == "master": master_slave_sql = "select a.record_time, connected_slave, slave_ip, slave_port, slave_state from redis_master a ,redis_slaves_info b on a.record_time=b.record_time where a.record_time=(select max(record_time) from redis_master)" master_slave_table = pt.PrettyTable( ["记录时间", "Slave数量", "Slave IP", "Slave端口", "Slave状态"]) master_slave_data = (db.query_all(master_slave_sql)) for i in master_slave_data: master_slave_table.add_row(i) elif role == "slave": master_slave_sql = "select record_time, pid, master_host, master_port, master_link_status from redis_slave order by record_time desc" master_slave_table = pt.PrettyTable( ["记录时间", "Pid", "master主机", "master端口", "与master连接状态"]) master_slave_data = (db.query_one(master_slave_sql)) master_slave_table.add_row(master_slave_data) # sentinel监控信息 sentinel_sql = "select a.record_time, role, host, a.port from redis_sentinel a, redis_constant b on a.record_time=b.record_time where b.record_time=(select max(record_time) from redis_constant)" sentinel_table = pt.PrettyTable(["记录时间", "角色", "IP", "端口"]) sentinel_data = (db.query_all(sentinel_sql)) for i in sentinel_data: sentinel_table.add_row(i) printf("启动信息:") printf(constant_table) printf("运行信息:") printf(variable_table) printf("集群信息:") printf(f"当前角色: {role}") printf(master_slave_table) printf("Sentinel监控信息:") printf(sentinel_table) printf("*" * 100) # backup if check_dict["backup_check"] == "1": logger.logger.info("统计备份记录信息...") printf("备份统计:") backup_dirs = conf.get("backup", "dir")[0].split(",") for i in backup_dirs: directory = i.strip() table = pt.PrettyTable(["记录时间", "备份文件", "大小", "创建时间"]) sql=f"select record_time, filename, size, ctime from backup "\ f"where directory=?"\ f"order by ctime" backup_data = db.query_all(sql, (directory, )) for j in backup_data: if j[2] is not None: size = format_size(j[2]) table.add_row((j[0], j[1], size, j[3])) printf(f"备份({directory})统计信息:") printf(table) printf("*" * 100) # MySQL if check_dict["mysql_check"][0] == "1": logger.logger.info("统计MySQL记录信息...") printf("MySQL统计:") mysql_granularity_level = int(60 / int(check_dict['mysql_check'][1]) * granularity_level) mysql_granularity_level = mysql_granularity_level if mysql_granularity_level != 0 else 1 printf("*" * 100) constant_sql=f"select record_time, pid, port, boot_time from mysql_constant "\ f"where '{now_time}' >= record_time "\ f"order by record_time desc" variable_sql=f"select record_time, pid, mem_used, mem_used_percent, connections, threads_num from mysql_variable "\ f"where record_time > datetime('{now_time}', '{modifier}') "\ f"order by record_time" # 启动信息 constant_table = pt.PrettyTable(["记录时间", "Pid", "端口", "启动时间"]) constant_data = (db.query_one(constant_sql)) constant_table.add_row(constant_data) # 运行信息 variable_table = pt.PrettyTable( ["记录时间", "Pid", "内存使用", "内存使用率", "连接数", "线程数"]) variable_data = (db.query_all(variable_sql)) for index, item in enumerate(variable_data): if index % mysql_granularity_level == 0 or index == 0: mem_used = format_size(item[2]) mem_used_percent = f"{item[3]:.2f}%" variable_table.add_row((item[0], item[1], mem_used, mem_used_percent, item[4], item[5])) # master_slave信息 role = db.query_one("select role from mysql_role")[0] if role == "master": master_slave_sql = "select record_time, pid, slave_num, binlog_do_db, binlog_ignore_db from mysql_master order by record_time desc" master_slave_table = pt.PrettyTable( ["记录时间", "Pid", "Slave数量", "Binlog_do_db", "Binlog_ignore_db"]) elif role == "slave": master_slave_sql="select record_time, pid, master_host, master_port, replicate_do_db, replicate_ignore_db, "\ "slave_io_thread, slave_io_state, slave_sql_thread, slave_sql_state, "\ "master_uuid, retrieved_gtid_set, executed_gtid_set, seconds_behind_master "\ "from mysql_slave order by record_time desc" master_slave_table = pt.PrettyTable([ "记录时间", "Pid", "Master主机", "Master端口", "同步数据库", "非同步数据库", "Slave_IO线程", "Slave_IO状态", "Slave_SQL线程", "Slave_SQL状态", "Master_UUID", "已接收的GTID集合", "已执行的GTID集合", "Slave落后Master的秒数" ]) master_slave_data = (db.query_one(master_slave_sql)) if master_slave_data is not None: master_slave_table.add_row(master_slave_data) printf("启动信息:") printf(constant_table) printf("运行信息:") printf(variable_table) printf("集群信息:") printf(f"当前角色: {role}") printf(master_slave_table) printf("*" * 100) # 慢日志 printf("慢日志信息:") mysql_user, mysql_ip, mysql_port, mysql_password = conf.get( "mysql", "mysql_user", "mysql_ip", "mysql_port", "mysql_password") mysql_flag, msg = mysql.export_slow_log( logger, mysql_user, mysql_ip, mysql_password, mysql_port, f"{report_dir}/slow_analysis.log", f"{report_dir}/slow.log") if mysql_flag == 1: message = f"该附件存在MySQL慢日志" printf(msg) printf("*" * 100) # Oracle表空间 if check_dict["oracle_check"][0] == "1": logger.logger.info("统计Oracle表空间记录信息...") printf("Oracle表空间统计:") oracle_granularity_level = int( 60 / int(check_dict['oracle_check'][1]) * granularity_level) oracle_granularity_level = oracle_granularity_level if oracle_granularity_level != 0 else 1 sql = "select distinct tablespace_name from oracle" tablespace_names = db.query_all(sql) for i in tablespace_names: i = i[0] table = pt.PrettyTable( ["记录时间", "表空间名称", "表空间大小", "已使用", "已使用百分比", "可用"]) sql=f"select record_time, size, used, used_percent, free from oracle "\ f"where tablespace_name=? "\ f"and record_time > datetime('{now_time}', '{modifier}') "\ f"order by record_time" tablespace_data = db.query_all(sql, (i, )) for index, item in enumerate(tablespace_data): if index % oracle_granularity_level == 0 or index == 0: total = format_size(item[1]) used = format_size(item[2]) used_percent = f"{item[3]}%" free = format_size(item[4]) table.add_row( (item[0], i, total, used, used_percent, free)) printf(f"{i}表空间统计:") printf(table) printf("*" * 100) # war logger.logger.info("生成awr报告...") printf("awr报告信息:") awr_hours = conf.get("oracle", "awr_hours")[0] if oracle.generate_awr(int(awr_hours), report_dir) == 0: printf("请在附件中查看awr.html文件") else: printf("生成awr报告失败, 请自行手动生成") logger.logger.info("统计资源结束...") printf("-" * 100) end_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") printf(f"统计结束时间: {end_time}") tar_file = tar_report(logger, report_dir) sender_alias, receive, subject = conf.get("mail", "sender", "receive", "subject") warning_msg = f"\n请查看统计报告.\n\n{message}" mail.send(logger, warning_msg, sender_alias, receive, subject, msg="report", attachment_file=tar_file)
if os.path.exists(pid_file): with open(pid_file, "r", encoding="utf8") as f: pid = f.read() if pid != '': pid = int(pid) if psutil.pid_exists(pid): cmdline = ",".join(psutil.Process(pid).cmdline()) if "python" in cmdline and "main.py" in cmdline: return pid return None if __name__ == "__main__": if len(sys.argv) == 2: rootdir = os.path.dirname(__file__) os.chdir(rootdir) cfg = configparser.ConfigParser() cfg.read("./conf/autocheck.conf") os.makedirs("./logs", exist_ok=True) log_file = get_config(cfg, "logs", "log_file") log_level = get_config(cfg, "logs", "log_level") try: logger = log.Logger(log_file, log_level) except Exception as e: print(f"Error: {e}") exit() action = sys.argv[1] usage(action) else: usage("usage")
def analysis(): log_file, log_level=log.get_log_args() logger=log.Logger(log_file, log_level) logger.logger.info("开始分析资源信息...") sender_alias, receive, subject=conf.get("mail", "sender", "receive", "subject" ) warning_percent, warning_interval, analysis_interval=conf.get("autocheck", "warning_percent", "warning_interval", "analysis_interval" ) disk_interval, cpu_interval, memory_interval=conf.get("host", "disk_interval", "cpu_interval", "memory_interval" ) min_value=5 warning_percent=float(warning_percent) warning_interval=int(warning_interval) analysis_interval=int(analysis_interval) disk_interval=int(disk_interval)+analysis_interval cpu_interval=int(cpu_interval)+analysis_interval memory_interval=int(memory_interval)+analysis_interval max_threads=20 executors = { "default": ThreadPoolExecutor(max_threads) } job_defaults = { "coalesce": True, "max_instances": 1, "misfire_grace_time": 3, } scheduler=BlockingScheduler(job_defaults=job_defaults, executors=executors) # host资源记录 logger.logger.info("开始分析主机资源信息...") scheduler.add_job(host.disk_analysis, 'interval', args=[log_file, log_level, warning_percent, warning_interval, sender_alias, receive, subject], seconds=disk_interval, id='disk_ana') scheduler.add_job(host.cpu_analysis, 'interval', args=[log_file, log_level, warning_percent, warning_interval, sender_alias, receive, subject], seconds=cpu_interval, id='cpu_ana') scheduler.add_job(host.memory_analysis, 'interval', args=[log_file, log_level, warning_percent, warning_interval, sender_alias, receive, subject], seconds=memory_interval, id='mem_ana') # users_limit logger.logger.info("开始分析用户资源信息...") scheduler.add_job(user_resource.analysis, 'interval', args=[log_file, log_level, 0, sender_alias, receive, subject], next_run_time=datetime.datetime.now()+datetime.timedelta(seconds=15), minutes=65, id=f'user_limit_ana') # tomcat资源 tomcat_check=conf.get("tomcat", "check")[0] if tomcat_check=='1': tomcat_interval=conf.get("tomcat", "tomcat_interval")[0] tomcat_interval=int(tomcat_interval)+analysis_interval logger.logger.info("开始分析Tomcat资源信息...") scheduler.add_job(tomcat.running_analysis, 'interval', args=[log_file, log_level, warning_interval, sender_alias, receive, subject], seconds=tomcat_interval, id='tomcat_run_ana') scheduler.add_job(tomcat.jvm_analysis, 'interval', args=[log_file, log_level, warning_interval, sender_alias, receive, subject], seconds=tomcat_interval, id='tomcat_jvm_ana') # redis资源 redis_check=conf.get("redis", "check")[0] if redis_check=="1": redis_interval=conf.get("redis", "redis_interval")[0] redis_interval=int(redis_interval)+analysis_interval logger.logger.info("开始分析Redis资源信息...") scheduler.add_job(redis.running_analysis, 'interval', args=[log_file, log_level, warning_interval, sender_alias, receive, subject], seconds=redis_interval, id='redis_run_ana') scheduler.add_job(redis.master_slave_analysis, 'interval', args=[log_file, log_level, warning_interval, sender_alias, receive, subject], seconds=redis_interval, id='redis_slave_ana') # 记录mysql mysql_check=conf.get("mysql", "check")[0] if mysql_check=="1": mysql_interval, seconds_behind_master=conf.get("mysql", "mysql_interval", "seconds_behind_master") mysql_interval=int(mysql_interval)+analysis_interval logger.logger.info("开始分析MySQL资源信息...") scheduler.add_job(mysql.running_analysis, 'interval', args=[log_file, log_level, warning_interval, sender_alias, receive, subject], seconds=mysql_interval, id='mysql_run_ana') scheduler.add_job(mysql.master_slave_analysis, 'interval', args=[log_file, log_level, int(seconds_behind_master), warning_interval, sender_alias, receive, subject], seconds=mysql_interval, id='mysql_slave_ana') # 记录Oracle oracle_check=conf.get("oracle", "check")[0] if oracle_check=="1": oracle_interval=conf.get("oracle", "oracle_interval")[0] oracle_interval=int(oracle_interval)+analysis_interval logger.logger.info("开始分析Oracle信息...") scheduler.add_job(oracle.tablespace_analysis, 'interval', args=[log_file, log_level, warning_percent, warning_interval, sender_alias, receive, subject], seconds=oracle_interval, id='oracle_tablespace_ana') # backup backup_check, backup_dir, backup_cron_time=conf.get("backup", "check", "dir", "cron_time" ) if backup_check=="1": dir_list=[] for i in backup_dir.split(","): dir_list.append(i.strip()) cron_time_list=[] for i in backup_cron_time.split(","): cron_time_list.append(i.strip()) for i in range(len(dir_list)): directory=dir_list[i] cron_time=cron_time_list[i].split(":") hour=cron_time[0].strip() minute=cron_time[1].strip() scheduler.add_job(backup.analysis, 'cron', args=[log_file, log_level, directory, 0, sender_alias, receive, subject], day_of_week='0-6', hour=int(hour), minute=int(minute)+1, id=f'backup{i}_ana') scheduler.start()
def record(log_file, log_level, redis_password, redis_port, sentinel_port, sentinel_name, commands): logger=log.Logger(log_file, log_level) db=database.db() logger.logger.debug("记录Redis资源") record_time=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") port=int(redis_port) pid=tools.find_pid(port) normal=1 if pid==0: logger.logger.error(f"Redis({port})未运行") sql="insert into redis_constant(record_time, pid, port, boot_time) values(?, ?, ?, ?)" db.update_one(sql, (record_time, pid, port, "0")) db.update_one("update redis_role set record_time=?, role=?", ("0", "master")) else: try: conn=Redis(host="127.0.0.1", port=redis_port, password=redis_password) conn.ping() except Exception as e: normal=0 msg=e if normal==1: redis_info=conn.info() redis_psutil_info=psutil.Process(pid).as_dict() boot_time=datetime.datetime.fromtimestamp(redis_psutil_info["create_time"]).strftime("%Y-%m-%d %H:%M:%S") sql="insert into redis_constant(record_time, pid, port, boot_time) values(?, ?, ?, ?)" db.update_one(sql, (record_time, pid, redis_port, boot_time)) redis_memory_percent=redis_psutil_info['memory_percent'] redis_memory=psutil.virtual_memory()[0] * redis_memory_percent / 100 redis_connections=redis_info['connected_clients'] redis_num_threads=redis_psutil_info['num_threads'] sql="insert into redis_variable values(?, ?, ?, ?, ?, ?)" db.update_one(sql, (record_time, pid, redis_memory, redis_memory_percent, redis_connections, redis_num_threads)) logger.logger.debug("记录Redis集群资源") role=redis_info['role'] db.update_one("update redis_role set record_time=?, role=?", (record_time, role)) if role=="master": connected_slaves=redis_info['connected_slaves'] sql="replace into redis_master values(?, ?, ?, ?)" db.update_one(sql, (record_time, pid, role, connected_slaves)) slaves_list=[] if connected_slaves!=0: for i in range(connected_slaves): slave=f"slave{i}" slaves_list.append((record_time, redis_info[slave]['ip'], redis_info[slave]['port'], redis_info[slave]['state'])) sql="replace into redis_slaves_info values(?, ?, ?, ?)" db.update_all(sql, slaves_list) elif role=="slave": sql="replace into redis_slave values(?, ?, ?, ?, ?, ?)" db.update_one(sql, (record_time, pid, role, redis_info['master_host'], redis_info['master_port'], redis_info['master_link_status'])) """显示自定义命令 if commands is not None: printf("-"*40) printf("自定义命令查询:") commands_list=commands.split(",") for command in commands_list: command=command.strip() result=conn.execute_command(command) printf(f"{command} 结果: {result}") """ conn.close() elif normal==0: logger.logger.error(f"无法连接redis: {msg}") sql="insert into error values(?, ?, ?, ?, ?)" db.update_one(sql, (record_time, "Redis", "connection", str(msg), 0)) # sentinel信息 if sentinel_port is not None: logger.logger.debug(f"记录Redis Sentinel信息...") conn=sentinel.Sentinel( [('127.0.0.1', sentinel_port)], socket_timeout=1 ) try: sentinel_info=[] master=conn.discover_master(sentinel_name) sentinel_info.append((record_time, 'master', master[0], master[1])) slaves=conn.discover_slaves(sentinel_name) for i in slaves: sentinel_info.append((record_time, 'slave', i[0], i[1])) sql="replace into redis_sentinel values(?, ?, ?, ?)" db.update_all(sql, sentinel_info) except Exception as e: logger.logger.error(f"Redis Sentinel无法连接...") sql="insert into error values(?, ?, ?, ?, ?)" db.update_one(sql, (record_time, 'Sentinel', "connection", str(e), 0))