Ejemplo n.º 1
0
 def checkslave(self):
     msg = ''
     ret = subprocess.Popen('%s -u%s -p%s -S %s -e "show slave status\G;"' %
                            (self.mysql, self.user, self.passwd, self.sock),
                            stdout=subprocess.PIPE,
                            shell=True,
                            stderr=subprocess.PIPE)
     err = ret.stderr.read()
     out = ret.stdout.readlines()
     if err or (not out):
         base.MQ.put("%s [ERROR] mysql -e执行异常!stderr:%s" %
                     (base.TIME(), err.strip()))
         msg = '%sMysql -e执行异常!' % msg
     for i in out:
         if i.strip().startswith('Slave_IO_Running:') or i.strip(
         ).startswith('Slave_SQL_Running:'):
             if i.split(':')[1].strip() == 'No':
                 base.MQ.put("%s [ERROR] Mysql主从复制失败!%s" %
                             (base.TIME(), i.strip()))
                 msg = '%sMysql主从复制失败' % msg
                 break
         elif i.strip().startswith('Seconds_Behind_Master:'):
             count = int(i.split(':')[1].strip())
             if count > self.rep_delay_time:
                 base.MQ.put("%s [ERROR] Mysql主从复制延迟!%s" %
                             (base.TIME(), i.strip()))
                 msg = '%sMysql主从复制延迟%s' % (msg, count)
     return msg
Ejemplo n.º 2
0
	def check(self):
		count = []
		try:
			pos = os.path.getsize('/var/log/secure')
			if pos < self.f_pos:
				self.close()
				self.f = open('/var/log/secure')
				self.f_pos = 0
				self.f.seek(self.f_pos)
				return
			records = self.f.readlines()
			if records:
				self.f_pos = os.path.getsize('/var/log/secure')
			for record in records:
				if 'Accepted' in record:
					base.MQ.put("%s [DEBUG] CheckLogin Accepted记录:%s"%(base.TIME(),record))
					timestamp_year = base.time.strftime('%Y',base.time.localtime())
					timestamp_log = "%s%s"%(record[:16],timestamp_year)
					timestamp_now_sec = base.time.time()
					timestamp_log_sec = base.time.mktime(base.time.strptime(timestamp_log,"%b %d %H:%M:%S %Y"))
					find_ip = self.pattern.findall(record)
					for i in find_ip:
						t =timestamp_now_sec - timestamp_log_sec
						#print "时间判断:%s,%s"%(t,i)
						if (timestamp_now_sec - timestamp_log_sec) < 10:
							if not i in self.access_ip_list:
								#print "%s IP:%s login accepted!"%(base.TIME(),i)
								base.MQ.put("%s [WARNING] IP:%s login!"%(base.TIME(),i))
								count.append(i)
			if count:
				base.ALARM_DICT['sec'][0] = "IP:%s异常登陆!"%(','.join(count))
		except Exception,e:
			base.MQ.put("%s [ERROR] 读取日志文件:/var/log/secure 出错!%s"%(base.TIME(),e))
Ejemplo n.º 3
0
	def process_default(self,event):
		if event.name.endswith('.swp') or event.name.endswith('.swx') or event.name.endswith('.swpx') or event.name.endswith('~') or event.name.endswith('.swo') or ('prelink' in event.name) :
			# print 'tmpfile:%s'%event.name
			pass
		elif event.maskname == 'IN_MODIFY':
			base.MQ.put('%s [WARNING] %s Modified! EVENT NAME:%s'%(base.TIME(),event.pathname,event.name))
			base.ALARM_DICT['sec'][1]  += 1
		elif event.maskname == 'IN_DELETE':
			base.MQ.put('%s [WARNING] %s Deleted!'%(base.TIME(),event.pathname))
			base.ALARM_DICT['sec'][1] += 1
Ejemplo n.º 4
0
def main(conf):
    #conf = Config()
    threads = []

    for i in conf.get_sections():
        threads.append(
            threading.Thread(name='monitor_%s' % i,
                             target=eval('thread_%s' % i),
                             args=(conf, )))
    for i in threads:
        i.setDaemon(True)
    for i in threads:
        i.start()
    while True:
        try:
            base.time.sleep(60)
            thread_acount = threading.activeCount()
            base.MQ.put(
                "%s [DEBUG] 总线程数:%s[%s],当前线程数:%s" %
                (base.TIME(), len(threads) + 1, threads, thread_acount))
            if len(threads) + 1 > thread_acount:
                if "monitor_agent进程异常" not in base.ALARM_DICT['proc']:
                    base.ALARM_DICT[
                        'proc'] = 'monitor_agent进程异常!%s' % base.ALARM_DICT[
                            'proc']
        except KeyboardInterrupt:
            print "exit..."
            break
Ejemplo n.º 5
0
def Alert(url, msg):
    try:
        req = urllib.urlopen(url, data=msg)
    except IOError, e:
        base.MQ.put("%s [CRIT] 提交报警任务出错,请确认url地址可访问!:%s url=%s msg: %s" %
                    (base.TIME(), e, url, msg))
        return False
Ejemplo n.º 6
0
def thread_proc(conf):
    print "thread_proc running..."
    try:
        proc_list = conf.get('proc')
    except Exception, e:
        base.MQ.put("%s [ERROR] 配置文件错误,monitor_proc线程退出!%s" % (base.TIME(), e))
        base.time.sleep(1)
        sys.exit(1)
Ejemplo n.º 7
0
 def get(self, *args):
     l = len(args)
     try:
         if l == 1:
             return self.config.items(args[0])
         elif l == 2:
             return self.config.get(args[0], args[1])
     except (ConfigParser.NoSectionError, ConfigParser.NoOptionError), e:
         base.MQ.put("%s [ERROR] 配置文件读取错误:%s" % (base.TIME(), e))
Ejemplo n.º 8
0
	def __init__(self,access_ip_list):
		self.access_ip_list = access_ip_list
		self.pattern = re.compile('\d{0,3}\.\d{0,3}\.\d{0,3}\.\d{0,3}') 
		if os.path.exists('/var/log/secure'):
			self.f = open('/var/log/secure')
			self.f_pos = os.path.getsize('/var/log/secure')
			self.f.seek(self.f_pos)
		else:
			self.f = None
			self.f_pos = 0
			base.MQ.put("%s [ERROR] 系统日志文件:/var/log/secure 丢失!"%base.TIME())
Ejemplo n.º 9
0
def thread_system(conf):
    print "thread_system running..."
    try:
        threshold_cpu = conf.get('system', 'cpu')
        threshold_cpu_core = conf.get('system', 'cpu_core')
        threshold_mem = conf.get('system', 'mem')
        threshold_disk = conf.get('system', 'disk')
        threshold_inode = conf.get('system', 'inode')
        threshold_load = conf.get('system', 'load')
        threshold_net = conf.get('system', 'net')
    except Exception, e:
        base.MQ.put("%s [ERROR] 配置文件错误,monitor_system线程退出!%s" %
                    (base.TIME(), e))
        base.time.sleep(1)
        sys.exit(1)
Ejemplo n.º 10
0
def thread_mysql(conf):
    print "thread_mysql running..."
    try:
        mysqlbin = conf.get('mysql', 'mysql')
        sock = conf.get('mysql', 'sock')
        errlog = conf.get('mysql', 'errlog')
        pattern = conf.get('mysql', 'pattern')
        role = conf.get('mysql', 'role')
        user = conf.get('mysql', 'user')
        passwd = conf.get('mysql', 'passwd')
        rep_delay_time = conf.get('mysql', 'rep_delay_time')
    except Exception, err:
        print 'monitor thread-mysql error! %s' % err
        base.MQ.put("%s [ERROR] monitor_mysql线程配置错误!%s" % (base.TIME(), err))
        print 'thread-mysql exit...'
        os.kill(os.getpid(), 9)
        sys.exit(1)
Ejemplo n.º 11
0
def thread_sec(conf):
    print "thread_sec running..."
    #初始化变量
    #定义监视的事件
    #pdb.set_trace()
    mask = sec.IN_MODIFY | sec.IN_DELETE

    try:
        path = conf.get('sec', 'md_sys_path').split(':')
        path.extend(conf.get('sec', 'md_app_path').split(':'))

        m_files = conf.get('sec', 'md_file').split(':')
        access_ip_list = conf.get('sec', 'access_ip_list').split(":")
    except Exception, e:
        base.MQ.put("%s [ERROR] 配置文件错误,monitor_sec线程退出!%s" % (base.TIME(), e))
        base.time.sleep(1)
        sys.exit(1)
Ejemplo n.º 12
0
def CheckProc(proc_list):
    count = []
    try:
        for i in proc_list:
            proc_attr = i[1].split(':')
            try:
                proc_exe = proc_attr[0].strip()
                proc_port = proc_attr[1].strip()
                # 进程退出,自动启动
                proc_reload = proc_attr[2].strip()
            except Exception, e:
                base.MQ.put('%s [ERROR] %s 进程监控配置格式出错:%s' %
                            (base.TIME(), i[0], e))
                continue
            if proc_port:
                pid = os.popen(
                    "lsof -i4:%s|grep LISTEN|sed -n '1p'|awk '{print $2}'" %
                    proc_port).read().strip()
                if os.path.exists(
                        '/proc/%s/exe' % pid) and (proc_exe == os.readlink(
                            '/proc/%s/exe' % pid)):
                    continue
                else:
                    #print "进程退出:%s"%i[0]
                    base.MQ.put('%s [WARNING] %s 进程退出!' % (base.TIME(), i[0]))
                    count.append(i[0])
                    if proc_reload:
                        subprocess.Popen(proc_reload, shell=True)
                        base.MQ.put('%s [INFO] %s:执行进程自动重启!' %
                                    (base.TIME(), i[0]))
            else:
                pids = os.popen("ps -eo pid,cmd|grep %s|awk '{print $1}'" %
                                i[0]).read().strip().split(os.linesep)
                base.MQ.put("%s [DEBUG] 进程PIDS:%s,%s,%s" %
                            (base.TIME(), pids, i[0], proc_exe))
                exes = [
                    os.readlink('/proc/%s/exe' % pid) for pid in pids
                    if os.path.exists('/proc/%s/exe' % pid)
                ]
                if proc_exe not in exes:
                    base.MQ.put('%s [WARNING] %s 进程退出!' % (base.TIME(), i[0]))
                    count.append(i[0])
                    if proc_reload:
                        subprocess.Popen(proc_reload, shell=True)
                        base.MQ.put('%s [INFO] %s:执行进程自动重启!' %
                                    (base.TIME(), i[0]))
                    continue
        if count:
            base.ALARM_DICT['proc'] = '%s 进程退出!' % (','.join(count))
        else:
            base.ALARM_DICT['proc'] = ''
Ejemplo n.º 13
0
def thread_base(conf):
    '''
	负责写日志及提交信息到告警接口,日志格式说明:
	xxxx-xx-xx xx:xx:xx [标记] 消息
	-------------------		|
	    |___日期及时间		|
							|
							+---- [DEBUG]  	调试信息
							|____ [INFO] 	系统运行状态信息
							|____ [WARNING] 	监控到系统出现异常
							|____ [ERROR] 	一般程序错误信息
							|____ [CRITICAL]	严重错误,如提交警失败
	'''

    print "thread_base running..."

    try:
        log = conf.get('base', 'logfile')
        apiurl = conf.get('base', 'apiurl')
        log_level = conf.get('base', 'log_level')
    except Exception, e:
        print "%s [ERROR] 配置文件错误,monitor_base线程退出!%s" % (base.TIME(), e)
        sys.exit(1)
Ejemplo n.º 14
0
                    #print "进程退出:%s"%i[0]
                    base.MQ.put('%s [WARNING] %s 进程退出!' % (base.TIME(), i[0]))
                    count.append(i[0])
                    if proc_reload:
                        subprocess.Popen(proc_reload, shell=True)
                        base.MQ.put('%s [INFO] %s:执行进程自动重启!' %
                                    (base.TIME(), i[0]))
            else:
                pids = os.popen("ps -eo pid,cmd|grep %s|awk '{print $1}'" %
                                i[0]).read().strip().split(os.linesep)
                base.MQ.put("%s [DEBUG] 进程PIDS:%s,%s,%s" %
                            (base.TIME(), pids, i[0], proc_exe))
                exes = [
                    os.readlink('/proc/%s/exe' % pid) for pid in pids
                    if os.path.exists('/proc/%s/exe' % pid)
                ]
                if proc_exe not in exes:
                    base.MQ.put('%s [WARNING] %s 进程退出!' % (base.TIME(), i[0]))
                    count.append(i[0])
                    if proc_reload:
                        subprocess.Popen(proc_reload, shell=True)
                        base.MQ.put('%s [INFO] %s:执行进程自动重启!' %
                                    (base.TIME(), i[0]))
                    continue
        if count:
            base.ALARM_DICT['proc'] = '%s 进程退出!' % (','.join(count))
        else:
            base.ALARM_DICT['proc'] = ''
    except Exception, e:
        base.MQ.put('%s [ERROR] 进程监控程序出错:%s' % (base.TIME(), e))
Ejemplo n.º 15
0
	def hash_file(self,f):
		if os.path.exists(f):
			return hashlib.md5(open(f).read()).hexdigest()	
		else:
			base.MQ.put("%s [ERROR] 监控文件不存在:%s"%(base.TIME(),f))
Ejemplo n.º 16
0
    return socket.inet_ntoa(
        fcntl.ioctl(s.fileno(), 0x8915, struct.pack('256s',
                                                    interface[:15]))[20:24])


def Alert(url, msg):
    try:
        req = urllib.urlopen(url, data=msg)
    except IOError, e:
        base.MQ.put("%s [CRIT] 提交报警任务出错,请确认url地址可访问!:%s url=%s msg: %s" %
                    (base.TIME(), e, url, msg))
        return False
    try:
        ret = json.loads(req.read())
    except ValueError, e:
        base.MQ.put('%s [CRIT] 报警API接口返回值异常,可能非json格式:%s' % (base.TIME(), e))
        return False
    base.MQ.put('%s [DEBUG] 提交报警API URL地址json结果:%s' % (base.TIME(), ret))
    httpcode = str(req.getcode())
    if httpcode.startswith('2'):
        if ret['content'] in ("server status ok", "success", "auto repaired"):
            return True
        elif ret['content']['message'] != 'alarm is alarming':
            base.MQ.put("%s [CRIT] 提交报警任务失败! url=%s msg: %s ret:%s" %
                        (base.TIME(), url, msg, ret))
            return False
    else:
        base.MQ.put("%s [CRIT] 提交报警任务失败!POST返回状态码:%s url=%s msg: %s" %
                    (base.TIME(), httpcode, url, msg))
        return False
    return True