Ejemplo n.º 1
0
 def isAlive(self):
     try:
         self.getStatus()
         return 'true'
     except Exception, e:
         AgentLog.error('Mysql.isAlive failed, %s' % e)
         return "false"
Ejemplo n.º 2
0
 def check(self):
     """
      检测进程是否存在,检测方法:
      1.检测pid文件是否存在,如果不存在,则说明进程没有运行,新进程可以运行
      2.如果pid文件存在,则读取pid文件中的pid,在所有进程中查找文件中的pid的进程是否存在,
        如果进程存在,则新的进程不能运行,直接退出,如果进程不存在,说明可能是由于非正常退出(如:CTRL + C)
        造成pid文件未被清理,这种情况下,agent可以启动,先删除失效的pid文件,然后启动新的进程即可。
     """
     if not os.path.isfile(self.__pidFile__):
         return
     pid = 0
     try:
         file = open(self.__pidFile__, 'rt')
         data = file.read()
         file.close()
         # 获取文件中的pid进程号
         pid = int(data)
         if not self.isAlive(pid):
             try:
                 os.remove(self.__pidFile__)
                 return
             except:
                 AgentLog.warning("remove pid file {0} failed.".format(
                     self.__pidFile__))
     except:
         pass
     AgentLog.info("agent exist, only one process allow running")
     sys.exit(1)
Ejemplo n.º 3
0
    def doWork(self):
        content = {}
        for name, monitor in self.montorInstances.iteritems():
            data = monitor.getData()
            if data == None:
                AgentLog.warning(
                    "MonitorSenderThread.doWork can not get the %s Info,size is 0, now skip this time"
                    % name)
                return
            if len(data) == 0:
                AgentLog.warning(
                    "MonitorSenderThread.doWork can not get the %s Info,size is 0"
                    % name)
                continue
            content = dict(content, **data)
        request = AgentRequest(content=content)
        request.setId(self.instanceId, self.groupId, self.clusterId)

        for filter in self.filters:
            request.setContent(filter.filter(request.getContent()))

        if not self.isFirst:
            self.sender.send(request.getRequest())
        else:
            self.isFirst = False
Ejemplo n.º 4
0
 def getSocket(self):
     socket = self.getValue(self.configFile, "socket")
     if socket == "":
         AgentLog.warning(
             'Mysql can not get socket value from config, use /tmp/mysql.sock'
         )
         socket = "/tmp/mysql.sock"
     return socket
Ejemplo n.º 5
0
 def getDataDir(self):
     dataDir = self.getValue(self.configFile, "datadir")
     if dataDir == "":
         AgentLog.warning(
             'Mysql can not get datadir value from config, start get innodb_data_home_dir'
         )
         dataDir = self.getValue(self.configFile, "innodb_data_home_dir")
     return dataDir
Ejemplo n.º 6
0
 def reconnect(self):
     AgentLog.warning("receiver %s disconnectted, re-connect..." % self.name)
     RabbitMQWrapper.reconnect(self)
     queue_ = self.queue
     if self.exclusive:
         queue_ = self.channel.queue_declare(exclusive=True).method.queue
         self.queue = queue_
     else:
         self.channel.queue_declare(queue=queue_)
     self.channel.queue_bind(exchange=self.exchange, queue=queue_, routing_key=self.routing_key)
Ejemplo n.º 7
0
    def doWork(self):
        isSuccess, errMsg = self.check()

        if not isSuccess:
            return self.responseFailed(errMsg)
        try:
            ret = self.handle()
        except Exception, e:
            AgentLog.error('MysqlShell.doWork error :%s' % e)
            ret = self.errorHandler("%s" % e)
            return self.responseFailed(ret)
Ejemplo n.º 8
0
 def __init__(self,
              name_="RabbitMQSender",
              host_="localhost",
              port_=5672,
              exchange_='exchange',
              routing_key_='key'):
     RabbitMQWrapper.__init__(self, name_, host_, port_, exchange_, '',
                              routing_key_)
     AgentLog.info(
         "create sender %s {host:%s, port:%d, exchange:%s, routing_key:%s}"
         % (name_, host_, port_, exchange_, routing_key_))
Ejemplo n.º 9
0
 def stop(self):
     """
      程序退出时调用,在进程退出时,删除pid文件
     """
     AgentLog.info("RDS Agent start to exit")
     try:
         os.remove(self.__pidFile__)
     except:
         AgentLog.warning("remove pid file {0} error".format(
             self.__pidFile__))
         raise AgentException("remove pid file {0} error".format(
             self.__pidFile__))
Ejemplo n.º 10
0
 def __init__(self, attr):
     AgentLog.info('Start to init Mysql from config')
     Database.__init__(self, attr)
     try:
         self.installPath = attr['home']
         self.configFile = attr['config']
         self.backupDir = attr['backupdir']
         self.socket = self.getSocket()
         self.dataDir = self.getDataDir()
         self.binlogName = self.getBinlogName()
     except (KeyError, AgentAtrrException), e:
         raise AgentAtrrException('Mysql init KeyError:%s' % e)
Ejemplo n.º 11
0
 def doWork(self):
     isSuccess, errMsg = self.check()
     if not isSuccess:
         return self.responseFailed(errMsg)
     timestamp = self.response.getTimeStamp()
     timestamp = time.mktime(time.strptime(timestamp, '%Y-%m-%d %H:%M:%S'))
     try:
         isSuccess, masterFile, masterPos, errMsg, fileSize = self.db.backup(
             timestamp, self.backupType, self.binlogFile, self.backupTool)
     except Exception, e:
         isSuccess = False
         errMsg = 'Error %s' % e
         AgentLog.error(errMsg)
Ejemplo n.º 12
0
    def getInstanceInfo(self, instanceId):

        try:

            ret = {'DBName': None, 'IP': None, 'Port': None}
            ret['version'] = self.db.execSql('select version()')[0][0]
            ret['schema'] = len(self.db.execSql('show databases'))
            ret['Uptime'] = self.db.execSql('show status like "Uptime"')[0][0]
            ret['Engine'] = 'Innodb'
            ret['NbCluster'] = 'NO'
            return ret
        except Exception, e:
            AgentLog.error("AWR getInstanceInfo error:%s" % e)
Ejemplo n.º 13
0
 def start(self):
     """
      把进程pid写入到对应的pid文件
     """
     AgentLog.info("RDS Agent start to run")
     try:
         file = open(self.__pidFile__, 'wt')
         file.write(str(os.getpid()))
         file.close()
     except:
         AgentLog.error("open pid file {0} error, start failed".format(
             self.__pidFile__))
         raise AgentException("open pid file {0} failed".format(
             self.__pidFile__))
Ejemplo n.º 14
0
 def __init__(self, name_='RabbitMQReceiver', host_='localhost', port_=5672, exchange_='exchange', queue_='',
              routing_key_='key'):
     RabbitMQWrapper.__init__(self, name_, host_, port_, exchange_, queue_, routing_key_)
     self.exclusive = False
     if queue_ == "":
         queue_ = self.channel.queue_declare(exclusive=True).method.queue
         self.queue = queue_
         self.exclusive = True
     else:
         self.channel.queue_declare(queue=queue_)
     self.channel.queue_bind(exchange=exchange_, queue=queue_, routing_key=routing_key_)
     self.doWork = None
     AgentLog.info("create receiver %s {host:%s, port:%d, exchange:%s, routing_key:%s}" % (
     name_, host_, port_, exchange_, routing_key_))
Ejemplo n.º 15
0
 def __init__(self, threadName, activeReportInterval=5.0, loopInterval=60):
     threading.Thread.__init__(self, name=threadName)
     self.isRunning = True
     # 线程循环间隔时间
     self.loopInterval = loopInterval
     # 线程每隔一段时间需要上报一次当前状态,activeReportInterval为上报间隔
     self.activeReportInterval = activeReportInterval
     self.lastReportTime = 0
     # 多数线程为循环线程,需要每个一定时间运行一次,通过event.wait()进行等待,
     # 采用event.wait而不是sleep的好处是,在线程退出时可以直接通过set命令取消等待,
     # 不需要等待sleep结束
     self.event = threading.Event()
     self.event.clear()
     AgentLog.info("thread [{0}] created, report interval: {1}, loop interval: {2}".format( \
         self.getName(), activeReportInterval, loopInterval))
Ejemplo n.º 16
0
class RabbitMQSender(RabbitMQWrapper):
    def __init__(self,
                 name_="RabbitMQSender",
                 host_="localhost",
                 port_=5672,
                 exchange_='exchange',
                 routing_key_='key'):
        RabbitMQWrapper.__init__(self, name_, host_, port_, exchange_, '',
                                 routing_key_)
        AgentLog.info(
            "create sender %s {host:%s, port:%d, exchange:%s, routing_key:%s}"
            % (name_, host_, port_, exchange_, routing_key_))

    def send(self, msg):
        try:
            self.channel.basic_publish(exchange=self.exchange,
                                       routing_key=self.routing_key,
                                       body=msg)
        except ConnectionClosed, e:
            self.reconnect()
            self.channel.basic_publish(exchange=self.exchange,
                                       routing_key=self.routing_key,
                                       body=msg)
        except Exception, e:
            AgentLog.error("{0} send msg:{1} error".format(self.name, msg))
Ejemplo n.º 17
0
 def upload_ftp(self, params, srcDir):
     remoteHost = params.get('remoteHost', None)
     remotePort = params.get('remotePort', None)
     remoteUser = params.get('remoteUser', None)
     remotePasswd = params.get('remotePassword', None)
     backupPath = params.get('backupPath', None)
     uploadLimit = params.get('uploadLimit', None)
     if remoteHost == None or remotePort == None or remoteUser == None or \
                     remotePasswd == None or backupPath == None or uploadLimit == None:
         return False, 'remote host information errors'
     AgentLog.info("start upload backup data to ftp server")
     try:
         ftp = FTP()
         ftp.connect(remoteHost, int(remotePort), 60)
     except Exception, e:
         return False, 'can not connect to remote host: %s with port: %s, error: %s' % (
             remoteHost, remotePort, e)
Ejemplo n.º 18
0
    def setUp(self):
        print 'init logging'
        nowPath = re.findall('(.*?)IMAgent\\\\test', os.getcwd())[0]
        self.configPath = nowPath + 'IMAgent\\test\\conf\\'
        AgentLog.init(self.configPath + 'logging.cnf')

        #正确配置
        AgentConfig.database = AgentConfig.database = {
            'name': 'Mysql',
            'attr': {
                'user': '',
                'passwd': '',
                'home': '',
                'configFile': 'test.cnf',
                'backupDir': ''
            }
        }
Ejemplo n.º 19
0
    def parseMsg(self, response):
        AgentLog.debug("receive msg: {0}".format(response.body))
        try:
            try:
                command = CommandFactory.getCommand(response.getDBType(), response.getAction())(self.context, response)
            except AttributeError:
                request = self.createErrorRequest(response, 'command is not exist')
                self.sender.send(request.getRequest())
                return
            # request = command.doWork()
            # self.sender.send(request.getRequest())
            data = [((), {'command': command, 'sender': self.sender})]
            reqs = threadpool.makeRequests(doWork, data)
            [SyncCMDReceiver.pool.putRequest(req) for req in reqs]

        except Exception,e:
            AgentLog.error('syncCMDReceiver.parseMsg error %s'%e)
Ejemplo n.º 20
0
    def createMonitors(self, context, dbName, monitorList):

        if isinstance(monitorList, list) is not True:
            raise AgentAtrrException(
                'MonitorManager.createMonitors invalid monitorList')
        AgentLog.info('start to create monitors :%s' % (','.join(monitorList)))

        try:
            monitorInstances = {}
            base = 'from imagent.' + dbName
            for monitor in monitorList:
                url = base + '.monitor.' + monitor + ' import ' + monitor
                exec url
                monitorInstances[monitor] = eval(monitor)(context)
            return monitorInstances
        except AttributeError, e:
            raise AgentAtrrException(
                'MonitorManager.createMonitors can not get monitor :%s' % e)
Ejemplo n.º 21
0
    def doWork(self):
        try:
            temp = {}
            items = [
                'COUNT_STAR', 'SUM_TIMER_WAIT', 'MIN_TIMER_WAIT',
                'AVG_TIMER_WAIT', 'MAX_TIMER_WAIT', 'SUM_LOCK_TIME',
                'SUM_ERRORS', 'SUM_WARNINGS', 'SUM_ROWS_AFFECTED',
                'SUM_ROWS_SENT', 'SUM_ROWS_EXAMINED',
                'SUM_CREATED_TMP_DISK_TABLES', 'SUM_CREATED_TMP_TABLES',
                'SUM_SELECT_FULL_JOIN', 'SUM_SELECT_FULL_RANGE_JOIN',
                'SUM_SELECT_RANGE', 'SUM_SELECT_RANGE_CHECK',
                'SUM_SELECT_SCAN', 'SUM_SORT_MERGE_PASSES', 'SUM_SORT_RANGE',
                'SUM_SORT_ROWS', 'SUM_SORT_SCAN', 'SUM_NO_INDEX_USED',
                'SUM_NO_GOOD_INDEX_USED'
            ]
            result = {}
            #查询每个field的topN数据,然后合并
            for item in items:
                cmd = 'select SCHEMA_NAME,DIGEST,'+','.join(self.fields)+\
                      ' from performance_schema.events_statements_summary_by_digest order by '+item +' desc limit '+str(self.topNum)
                rows = self.dataBaseInstance.execSql(cmd)
                for row in rows:
                    if str(row[0]) + row[1] not in result.keys():
                        result[str(row[0]) + row[1]] = row

            if result == {}:
                AgentLog.warning(
                    'TopSQLMonitor.doWork can not get topsql info')
                return

            for key, line in result.iteritems():
                if line[0] is None:  # 有schema为None的,这里取出来就是空了,为了下面的加法正确,将这个特殊值处理
                    statusBase = 'null' + "|" + line[1]
                else:
                    statusBase = line[0] + "|" + line[1]

                for index, val in enumerate(line[2:]):
                    status = statusBase + '|' + self.fields[index]
                    temp[status] = val
            self.lock.acquire()
            self.dataDict = temp
            self.lock.release()
        except Exception, e:
            raise e
Ejemplo n.º 22
0
    def getCacheProfile(self, instanceId):

        try:
            ret = {}

            ret['PoolSize'] = int(self.db.execSql("show variables like 'innodb_buffer_pool_size'")[0][1]) / 1024 / 1024

            ret['RedologSize'] = int(self.db.execSql("show variables like 'innodb_log_file_size'")[0][1]) / 1024 / 1024

            # 获取hint
            answers = self.db.execSql("show status like 'innodb%_reads'")
            hint = {'Innodb_buffer_pool_reads': 0, 'Innodb_data_reads': 0}
            for line in answers:
                if line[0] in ('Innodb_buffer_pool_reads', 'Innodb_data_reads'):
                        hint[line[0]] = line[1]
            ret['Hint'] = float(hint['Innodb_buffer_pool_reads']) / float(hint['Innodb_data_reads'])
            return ret

        except Exception, e:
            AgentLog.error("AWR getCacheProfile error:%s" % e)
Ejemplo n.º 23
0
    def doWork(self):
        isSuccess, errMsg = self.check()
        if not isSuccess:
            return self.responseFailed(errMsg)
        endTime = self.endTime
        startTime = self.startTime
        instanceID = self.instanceID

        try:
            # 获取实例参数
            instanceData = self.getInstanceInfo(self.instanceID)

            # 获取时间参数
            elapsed = time.mktime(time.strptime(endTime, "%Y-%m-%d %H:%M")) - time.mktime(
                time.strptime(startTime, "%Y-%m-%d %H:%M"))
            if elapsed < 0:
                return self.responseFailed('TimeError')
            snapTime = {'BeginSnap': startTime, 'EndSnap': endTime, 'Elapsed': elapsed}

            # 获取全局参数
            showGlobal = self.db.execSql("show global variables")

            # 获取sql参数
            sqlData = self.getSQL(instanceID)

            # 获取状态参数
            # statusData = self.getLoadProfile(elapsed, startTime, endTime, instanceID)

            cacheProfile = self.getCacheProfile(instanceID)
            waitEvent = self.getWaitEvent(instanceID)

            ret = {'awr': {'Instance': instanceData,
                           'loadProfile': None, 'SnapTime': snapTime, 'CacheProfile': cacheProfile,
                           'WaitEvent': waitEvent, 'showGlobal': showGlobal, 'sql': sqlData
                           }}

            return self.responseOK(ret)

        except Exception, e:
            AgentLog.error('AWR get, error:%s' % (e))
Ejemplo n.º 24
0
 def upload_scp(self, params, srcDir):
     remoteHost = params.get('remoteHost', None)
     remotePort = params.get('remotePort', None)
     remoteUser = params.get('remoteUser', None)
     remotePasswd = params.get('remotePassword', None)
     backupPath = params.get('backupPath', None)
     uploadLimit = params.get('uploadLimit', None)
     if remoteHost == None or remotePort == None or remoteUser == None or \
                     remotePasswd == None or backupPath == None or uploadLimit == None:
         return False, 'remote host information errors'
     uploadLimit = long(uploadLimit)
     AgentLog.info("start upload backup data to remote server")
     cmd = "scp -r -P %s %s %s@%s:%s" % (remotePort, srcDir, remoteUser,
                                         remoteHost, backupPath)
     if uploadLimit > 0:
         cmd = "scp -r -P %s -l %d %s %s@%s:%s" % (remotePort, uploadLimit *
                                                   8, srcDir, remoteUser,
                                                   remoteHost, backupPath)
     lines = Util.popen(cmd).readlines()
     if len(lines) != 0:
         return False, ' '.join(lines)
     return True, ''
Ejemplo n.º 25
0
def main():
    agentManager = None
    try:

        # 解析命令行,获取配置文件地址和是否console执行
        optParser = AgentOptionParser()
        cfgFile = Util.getRealPath(optParser.configFile)
        console = optParser.console

        # 获取配置文件配置
        userConfig, sysConfig = AgentConfigManager.initAllConfig(cfgFile)

        # 如果为console状态,则直接运行,如果为非console状态,则在后台以daemon形式运行
        if not console:
            Daemon()
        else:
            userConfig.log['logSection'] = "debug"

        # 初始化日志句柄,所有相关日志的操作都需要放该操作后面
        AgentLog.init(userConfig.log['logConfig'],
                      userConfig.log['logSection'])

        agentManager = AgentManager(userConfig, sysConfig)

        agentManager.start()

        while True:
            Util.sleep(5)

        agentManager.stop()

    except KeyboardInterrupt:
        if agentManager is not None:
            agentManager.stop()
    finally:
        if agentManager is not None:
            agentManager.stop()
Ejemplo n.º 26
0
class BaseThread(threading.Thread):
    def __init__(self, threadName, activeReportInterval=5.0, loopInterval=60):
        threading.Thread.__init__(self, name=threadName)
        self.isRunning = True
        # 线程循环间隔时间
        self.loopInterval = loopInterval
        # 线程每隔一段时间需要上报一次当前状态,activeReportInterval为上报间隔
        self.activeReportInterval = activeReportInterval
        self.lastReportTime = 0
        # 多数线程为循环线程,需要每个一定时间运行一次,通过event.wait()进行等待,
        # 采用event.wait而不是sleep的好处是,在线程退出时可以直接通过set命令取消等待,
        # 不需要等待sleep结束
        self.event = threading.Event()
        self.event.clear()
        AgentLog.info("thread [{0}] created, report interval: {1}, loop interval: {2}".format( \
            self.getName(), activeReportInterval, loopInterval))

    def run(self):
        AgentLog.info('thread {0} start running'.format(self.getName()))
        while self.isRunning:
            try:
                self.doWork()
                if self.lastReportTime == 0:
                    self.lastReportTime = int(time.time())
                # 获取当前时间
                currentTime = int(time.time())
                # 比较是否已经超时,如果超时,则在日志文件中打印线程alive信息
                if currentTime >= self.lastReportTime + self.activeReportInterval:
                    AgentLog.info(
                        ' thread:[{0}] is alive '.format(self.getName()).ljust(45, ' ').center(75, '#'))
                    self.lastReportTime = currentTime
            except Exception, e:
                AgentLog.error(
                    "thread [{0}] raise Exception: {1}".format(self.getName(), traceback.format_exc()))
            # 等待下一次运行
            self.event.wait(self.loopInterval)
        AgentLog.info('thread:{0} exit'.format(self.getName()))
Ejemplo n.º 27
0
 def receive(self, doWork, ack=False):
     self.doWork = doWork
     while 1:
         try:
             self.channel.basic_consume(self.callback, queue=self.queue, no_ack=ack)
             self.channel.start_consuming()
         except KeyboardInterrupt:
             AgentLog.warning("receive KeyboardInterrupt error")
             break
         except ConnectionClosed, e:
             AgentLog.warning("receive ConnectionClosed error:%s" % e)
             if not self.normalExit:
                 self.reconnect()
             else:
                 break
         except Exception, e:
             self.reconnect()
             AgentLog.warning("receive error: %s" % e)
Ejemplo n.º 28
0
 def run(self):
     AgentLog.info('thread {0} start running'.format(self.getName()))
     while self.isRunning:
         try:
             self.doWork()
             if self.lastReportTime == 0:
                 self.lastReportTime = int(time.time())
             # 获取当前时间
             currentTime = int(time.time())
             # 比较是否已经超时,如果超时,则在日志文件中打印线程alive信息
             if currentTime >= self.lastReportTime + self.activeReportInterval:
                 AgentLog.info(
                     ' thread:[{0}] is alive '.format(self.getName()).ljust(45, ' ').center(75, '#'))
                 self.lastReportTime = currentTime
         except Exception, e:
             AgentLog.error(
                 "thread [{0}] raise Exception: {1}".format(self.getName(), traceback.format_exc()))
         # 等待下一次运行
         self.event.wait(self.loopInterval)
Ejemplo n.º 29
0
class RabbitMQSender(RabbitMQWrapper):
    def __init__(self,
                 name_="RabbitMQSender",
                 host_="localhost",
                 port_=5672,
                 exchange_='exchange',
                 routing_key_='key'):
        RabbitMQWrapper.__init__(self, name_, host_, port_, exchange_, '',
                                 routing_key_)
        AgentLog.info(
            "create sender %s {host:%s, port:%d, exchange:%s, routing_key:%s}"
            % (name_, host_, port_, exchange_, routing_key_))

    def send(self, msg):
        try:
            self.channel.basic_publish(exchange=self.exchange,
                                       routing_key=self.routing_key,
                                       body=msg)
        except ConnectionClosed, e:
            self.reconnect()
            self.channel.basic_publish(exchange=self.exchange,
                                       routing_key=self.routing_key,
                                       body=msg)
        except Exception, e:
            AgentLog.error("{0} send msg:{1} error".format(self.name, msg))
        if len(msg) > 128:
            AgentLog.debug("{0} send msg: {1}".format(self.name, msg[:128]))
        else:
            AgentLog.debug("{0} send msg: {1}".format(self.name, msg))
Ejemplo n.º 30
0
        if not Util.isExists(configFile):
            raise AgentFileException(
                'Mysql can not find config File from path :%s' % configFile)
        try:
            with open(configFile, 'r') as f:
                for line in f:
                    if line.find(key) != -1:
                        key = (line.split('=')[0]).strip()
                        if key[0] != '#':
                            value = (line.split('=')[1]).strip()
                            print value
                            return value
        except IOError, e:
            raise AgentFileException(
                'Mysql can not find config File from path :%s' % configFile)
        except Exception, e:
            AgentLog.warning(
                'Mysql can not get Value from config, key: %s,configFile:%s' %
                (key, configFile))
        return ""


class SlowLogFile(DBFile):
    def getData(self):
        pass


class ErrorLogFile(DBFile):
    def getData(self):
        pass