Ejemplo n.º 1
0
    def check_instance_state(self):
        '''
        获取所有的instance状态,报告哪些有问题
        '''
        service_role_map = {}
        #get all instance
        session = database.getSession()
        for instance in session.query(Instance):
            result,msg = self.check_instance(instance)
            self.update_result(result,msg)
            if not service_role_map.has_key(instance.service) :
                service_role_map[instance.service] = []
            service_role_map[instance.service].append(instance)

        for (service,roles) in service_role_map.items():
            if service == "zookeeper" :
                zk_leader_port = database.get_service_conf(session,"zookeeper","zookeeper_leader_port")   
                result,msg = self.check_zk_leader(zk_leader_port,roles)
                self.update_result(result,msg)
            elif service == "hbase" :
                #TODO
                hbase_master_info_port = database.get_service_conf(session,"hbase","hbase_master_info_port")
                result,msg = self.check_hbase_master(hbase_master_info_port,roles)
                self.update_result(result,msg)
                

        session.close()
        return (self.state,self.msg)
Ejemplo n.º 2
0
    def check_instance_state(self):
        '''
        获取所有的instance状态,报告哪些有问题
        '''
        service_role_map = {}
        #get all instance
        session = database.getSession()
        for instance in session.query(Instance):
            #检查实例
            key_word,msg = self.check_instance(instance)
            if key_word != None:
                self.update_result(key_word, msg)
            #采集服务
            if not service_role_map.has_key(instance.service) :
                service_role_map[instance.service] = []
            service_role_map[instance.service].append(instance)

        for (service,roles) in service_role_map.items():
            if service == "zookeeper" :
                zk_leader_port = database.get_service_conf(session,"zookeeper","zookeeper_leader_port")   
                result,msg = self.check_zk_leader(zk_leader_port,roles)
                if not result:
                    key_word = "cluster(zk no leader)"
                    self.update_result(key_word, msg)
            elif service == "hbase" :
                #TODO
                hbase_master_info_port = database.get_service_conf(session,"hbase","hbase_master_info_port")
                result,msg = self.check_hbase_master(hbase_master_info_port,roles)
                if not result:
                    key_word = "cluster(hbase no leader)"
                    self.update_result(key_word, msg)

        session.close()
        return self.alarm_list
Ejemplo n.º 3
0
def get_cluster_name():
    cluster_name = ""
    session = database.getSession()
    cluster_name = database.get_service_conf(session, "ganglia",
                                             "cluster_name")
    session.close()
    return cluster_name
Ejemplo n.º 4
0
 def namenode_web(self):
     session = database.getSession()
     nm_web_port = database.get_service_conf(session,"hdfs","dfs_namenode_http_address_port")
     nms = []
     for inst in session.query(Instance).filter(Instance.role == "namenode"):
         nms.append(inst.host)
     session.close()
     return namenode_web.namenode_web(nms, nm_web_port)
Ejemplo n.º 5
0
    def init_config(self):
        '''
        初始化关于获取端口等的内容
        默认从数据库中获取,由于可能只使用监控
        所以本函数可以直接修改为固定值
        '''
        session = database.getSession()
        insts = session.query(Instance).filter(Instance.role=="resourcemanager")
        for inst in insts:
            self.rmhost = inst.host;

        self.rmport=database.get_service_conf(session,"yarn","yarn_rm_webapp_port")
        insts = session.query(Instance).filter(Instance.role=="historyserver")
        for inst in insts:
            self.hshost = inst.host;

        self.hsport = database.get_service_conf(session,"yarn","mapreduce_jobhistory_webapp_port")

        session.close()
Ejemplo n.º 6
0
 def resourcemanager_web(self):
     session = database.getSession()
     rm_port = database.get_service_conf(session,"yarn","yarn_rm_webapp_port")
     rms = []
     for inst in session.query(Instance).filter(Instance.role == "resourcemanager"):
         rms.append(inst.host)
     session.close()
     if len(rms) == 0 :
         return  (contants.ALARM_ERROR, u"%s 检查不到有resourcemanager" % self.rule.name )
     if len(rms) != 1 :
         return  (contants.ALARM_ERROR, u"%s 检查到有多个resourcemanager %s" % (self.rule.name, ",".join(rms) ) )
     return resourcemanager_web.resourcemanager_web(rms[0],rm_port)
Ejemplo n.º 7
0
    def init_config(self):
        '''
        初始化关于获取端口等的内容
        默认从数据库中获取,由于可能只使用监控
        所以本函数可以直接修改为固定值
        '''
        session = database.getSession()
        insts = session.query(Instance).filter(
            Instance.role == "resourcemanager")
        for inst in insts:
            self.rmhost = inst.host

        self.rmport = database.get_service_conf(session, "yarn",
                                                "yarn_rm_webapp_port")
        insts = session.query(Instance).filter(
            Instance.role == "historyserver")
        for inst in insts:
            self.hshost = inst.host

        self.hsport = database.get_service_conf(
            session, "yarn", "mapreduce_jobhistory_webapp_port")

        session.close()
Ejemplo n.º 8
0
    def check_instance_state(self):
        '''
        获取所有的instance状态,报告哪些有问题
        '''
        service_role_map = {}
        #get all instance
        session = database.getSession()
        for instance in session.query(Instance):
            #检查实例
            key_word, msg = self.check_instance(instance)
            if key_word != None:
                self.update_result(key_word, msg)
            #采集服务
            if not service_role_map.has_key(instance.service):
                service_role_map[instance.service] = []
            service_role_map[instance.service].append(instance)

        for (service, roles) in service_role_map.items():
            if service == "zookeeper":
                zk_leader_port = database.get_service_conf(
                    session, "zookeeper", "zookeeper_leader_port")
                result, msg = self.check_zk_leader(zk_leader_port, roles)
                if not result:
                    key_word = "cluster(zk no leader)"
                    self.update_result(key_word, msg)
            elif service == "hbase":
                #TODO
                hbase_master_info_port = database.get_service_conf(
                    session, "hbase", "hbase_master_info_port")
                result, msg = self.check_hbase_master(hbase_master_info_port,
                                                      roles)
                if not result:
                    key_word = "cluster(hbase no leader)"
                    self.update_result(key_word, msg)

        session.close()
        return self.alarm_list
Ejemplo n.º 9
0
 def resourcemanager_web(self):
     session = database.getSession()
     rm_port = database.get_service_conf(session,"yarn","yarn_rm_webapp_port")
     rms = []
     for inst in session.query(Instance).filter(Instance.role == "resourcemanager"):
         rms.append(inst.host)
     session.close()
     alarm_list = []
     if len(rms) == 0 :
         key_word = "cluster(no rm)"
         msg = u"%s 检查不到有resourcemanager" % self.rule.name 
         return [{"key_word":key_word,"msg":msg}]  
     if len(rms) != 1 :
         key_word = "cluster(too much rm)"
         msg = u"%s 检查到有多个resourcemanager %s" % (self.rule.name, ",".join(rms) )
         return [{"key_word":key_word,"msg":msg}]  
     return resourcemanager_web.resourcemanager_web(rms[0],rm_port)
Ejemplo n.º 10
0
def get_cluster_name():
    cluster_name = ""
    session = database.getSession()
    cluster_name = database.get_service_conf(session,"ganglia","cluster_name")
    session.close()
    return cluster_name