Ejemplo n.º 1
0
class Monitor():
    def __init__(self, app_base, install_path=None, debug=False):
        self.APP_BASE = app_base
        self.CONF_BASE = _CONF_BASE
        self.install_path = install_path
        self.pkgCom = PkgCommon()
        self.log_level = 'DEBUG' if debug else 'INFO'

    def run(self):
        if self.install_path == 'all':
            info_list = self.pkgCom.getPkgList()
        else:
            pkg_info = self.pkgCom.getPkgId(self.install_path)
            if pkg_info:
                info_list = {'pkg': [pkg_info]}
            else:
                return 1, "%s not a valid package" % (self.install_path)

        t_list = []
        #只处理包类型为pkg的包
        if 'pkg' in info_list:
            pkg_info_list = info_list['pkg']
        else:
            pkg_info_list = []
        for pkg_info in pkg_info_list:
            t_list.append(gevent.spawn(self.monitorPkg, pkg_info))
        gevent.joinall(t_list)
        return 0, "ok"

    def reportStatus(self,
                     pkg_id,
                     pkg_conf_path,
                     inst_info,
                     status,
                     err_info={}):
        reportInfo = {}
        reportInfo['dims'] = {}
        reportInfo['vals'] = {}
        reportInfo['dims']['process.process.package_id'] = pkg_id
        reportInfo['dims']['process.process.version_id'] = ""
        reportInfo['dims']['process.process.install_path'] = inst_info[
            'installPath']

        reportInfo['vals']['process.process.package_status'] = status
        if not err_info.get('err_proc') and not err_info.get('err_port'):
            reportInfo['vals']['process.process.alert_status'] = 0
        else:
            reportInfo['vals']['process.process.alert_status'] = 1

        conf_file = os.path.join(pkg_conf_path, 'package.conf.yaml')
        proc_config = common.getConfig(conf_file, "proc_list")
        port_config = common.getConfig(conf_file, "port_list")
        proc_list = {}
        for proc in proc_config:
            proc_list[proc['proc_name']] = proc

        normal_proc_list = []
        proc_num_list = self.pkgCom.getProcNum()
        normal_proc_str = ""
        for proc_name in err_info.get('ok_proc', []):
            num_min = proc_list[proc_name].get('proc_num_min', 0)
            num_max = proc_list[proc_name].get('proc_num_max', 0)
            proc_str = "%s:%s,%s|%s" % (proc_name, num_min, num_max,
                                        proc_num_list[proc_name])
            normal_proc_list.append(proc_str)
            normal_proc_str = '##'.join(normal_proc_list)
        abnormal_proc_list = []
        abnormal_proc_str = ""
        for proc_name in err_info.get('err_proc', []):
            num_min = proc_list[proc_name].get('proc_num_min', 0)
            num_max = proc_list[proc_name].get('proc_num_max', 0)
            proc_str = "%s:%s,%s|%s" % (proc_name, num_min, num_max,
                                        proc_num_list[proc_name])
            abnormal_proc_list.append(proc_str)
            abnormal_proc_str = '##'.join(abnormal_proc_list)

        normal_port_list = []
        normal_port_str = ""
        for port in err_info.get('ok_port', []):
            normal_port_list.append(port)
        normal_port_str = "##".join(map(str, normal_port_list))

        abnormal_port_list = []
        abnormal_port_str = ""
        for port in err_info.get('err_port', []):
            abnormal_port_list.append(port)
        abnormal_port_str = "##".join(map(str, abnormal_port_list))

        reportInfo['vals'][
            'process.process.normal_processes'] = normal_proc_str
        reportInfo['vals'][
            'process.process.abnormal_processes'] = abnormal_proc_str
        reportInfo['vals']['process.process.normal_ports'] = normal_port_str
        reportInfo['vals'][
            'process.process.abnormal_ports'] = abnormal_port_str
        if report:
            ret, msg = report(data_id=3000,
                              dims=reportInfo['dims'],
                              vals=reportInfo['vals'])

    @catch_except  # 确保不会相互影响
    def monitorPkg(self, pkg_info):
        import random
        gevent.sleep(random.randint(0, 15))

        pkg_id, pkg_conf_path, pkg_install_path = pkg_info[
            'packageId'], pkg_info['confPath'], pkg_info['installPath']
        # 初始化log配置
        st = time.time()
        mLog = self.pkgCom.configLog(pkg_install_path, 'monitor',
                                     self.log_level)
        mLog.info('check process start: %s' % pkg_install_path)
        # 读取实例信息
        inst_conf_file = os.path.join(pkg_conf_path, 'instance.conf.yaml')
        with open(inst_conf_file, 'r') as fp:
            conf_info = yaml.load(fp)
        inst_info = conf_info.pop()

        # 检查包是否已启动
        status_conf_file = os.path.join(pkg_conf_path, 'package.status')
        if not os.path.isfile(status_conf_file):
            return 0, 'ok'
        with open(status_conf_file, 'r') as fp:
            status_info = yaml.load(fp)
        start_status = status_info['status']

        if start_status == 'stopped':
            self.reportStatus(pkg_id,
                              pkg_conf_path,
                              inst_info,
                              status=start_status)
            mLog.info('package status is stopped: %s %s' %
                      (pkg_info['installPath'], round(time.time() - st, 4)))
            return 0, 'ok'

        # 获得文件锁,准备检查。且快速失败,因为进程检查本身就是1分钟执行一次的
        ret = self.pkgCom.getLock(pkg_conf_path, timeout=10)
        if not ret:
            mLog.error("get lock error")
            return 2008, "get lock error"

        # 根据包配置信息检查包的进程状态
        conf_file = os.path.join(pkg_conf_path, 'package.conf.yaml')
        err_proc, ok_proc = self.pkgCom.checkProcStatus(
            conf_file, install_path=inst_info.get('installPath'))
        err_port, ok_port = self.pkgCom.checkPort(
            conf_file, install_path=inst_info.get('installPath'))
        proc_config = common.getConfig(conf_file, "proc_guard")
        port_config = common.getConfig(conf_file, "port_guard")
        err_info = {
            'err_proc': err_proc,
            'ok_proc': ok_proc,
            'err_port': err_port,
            'ok_port': ok_port
        }
        self.reportStatus(pkg_id,
                          pkg_conf_path,
                          inst_info,
                          status=start_status,
                          err_info=err_info)

        code = 0
        msg = 'ok'

        err_msg = ""
        if err_proc:
            err_msg += ",error process:" + ",".join(err_proc)
        if err_port:
            err_msg += ",error port:" + ",".join(map(str, err_port))

        # 包操作对象
        op = pkgOp(self.APP_BASE, inst_info['installPath'])
        if (err_proc and proc_config
                == 'stopStart') or (err_port and port_config == 'stopStart'):
            msg = "process error,monitor run stopStart:" + err_msg
            mLog.info(msg)
            code, msg = op.stopStart(inst_info['packageId'],
                                     inst_info['installPath'])
        elif (err_proc
              and proc_config == 'custom') or (err_port
                                               and port_config == 'custom'):
            msg = "process error,monitor run custom script:" + err_msg
            mLog.info(msg)
            code, msg = op.resolve(inst_info['packageId'],
                                   inst_info['installPath'])
        elif err_proc or err_port:
            msg = "process error,do nothing:" + err_msg
            mLog.info(msg)
        # 解锁
        self.pkgCom.unLock(pkg_conf_path)
        mLog.info('check process end: %s %s' %
                  (pkg_info['installPath'], round(time.time() - st, 4)))
        return code, msg
Ejemplo n.º 2
0
class pkgOp:
    def __init__(self, app_base="", install_path="", debug=False):
        global CURR_FOLDER
        self.curr_folder = CURR_FOLDER
        self.APP_BASE = app_base
        self.CONF_BASE = _CONF_BASE
        self.pkgCom = PkgCommon()
        if not install_path:
            self.install_path = os.getcwd()
        else:
            self.install_path = install_path

        pkg_info = self.pkgCom.getPkgId(self.install_path)
        if pkg_info :
            self.pkg_id = pkg_info['packageId']
            self.pkg_conf_path = pkg_info['confPath']
            log_level = 'DEBUG' if debug else 'INFO'
            self.opLog = self.pkgCom.configLog(pkg_info['installPath'],'op', log_level)
        else:
            self.pkg_id = None
            self.pkg_conf_path = None
        self.fp = {}
    def getUser(self,conf_file):
        cur_user = pwd.getpwuid(os.getuid()).pw_name
        cur_group = grp.getgrgid(os.getgid()).gr_name
        conf_dict = yaml.load(file(conf_file, 'r'))
        if not conf_dict.get('user'):
            user, group = cur_user, cur_group
        else:
            # 兼容root:root或者root.root的两种方式
            tmp = [s.strip() for s in re.split('\.|:', conf_dict['user'])]
            if len(tmp) == 1: # 如果只填了用户
                user, group = tmp[0], tmp[0]
            else:
                user, group = tmp[0:2]
        return user, group

    @catch_except
    def start(self, pkg_id, install_path, update_status=True):
        conf_file = self.getConfigFile(pkg_id, install_path)
        user, group = self.getUser(conf_file)
        # 修改文件属主
        common.chown_by_name(install_path, user, group)

        # 添加crontab
        self.opLog.info("start to add crontab")
        ret = self.pkgCom.getLock(os.path.dirname(self.CONF_BASE), filename="crontab.lock")
        if not ret:
            self.exit_proc(2005, "get lock error, please try again")
        shell = 'export VISUAL="%s/crontab.py add %s";echo "no"|crontab -e'%(self.curr_folder, install_path)
        code,msg = common.runShell(shell,user=user)
        if code != 0:
            self.exit_proc(2010, "add crontab error,code:%s,msg:%s"%(code,msg))
        ret = self.pkgCom.unLock(os.path.dirname(self.CONF_BASE), filename ="crontab.lock")
        if not ret:
            self.exit_proc(2009, "unlock error")

        # 执行启动脚本
        self.opLog.info("start to start")
        code, start_msg = common.runConfig(conf_file, "start_script", install_path)
        if code != 0:
            msg = '执行启动脚本失败, code=%s,msg=%s' % (code,start_msg)
            self.opLog.info(msg)
            return code, msg
        msg = start_msg

        self.opLog.info("start end, start to check process")
        # 检查启动结果
        err_app, ok_app, err_port, ok_port = self.pkgCom.checkStart(conf_file, install_path)

        # 更新包状态
        if update_status:
            self.opLog.info("check end,update status")
            status = {
                'status': 'started',
                'time': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
                'successProcess': ok_app,
                'errorProcess': err_app,
                'successPort': ok_port,
                'errorPort': err_port
            }
            self.updateStatus(status)
        err_msg = ""
        if err_app or err_port:
            if err_app:
                err_msg += ",异常进程:" + ";".join(err_app)
            if err_port:
                err_msg += ",异常端口:" + ";".join(map(str, err_port))

            msg = '启动失败 %s,启动返回信息:%s' % (err_msg,start_msg)
            self.opLog.info(msg)
            code = 1009
            return code, msg
        self.opLog.info("start successfully ")
        return code, msg

    # 单独配置的启动脚本
    def restart(self, pkg_id, install_path):
        err = ""
        code, msg = self.stop(pkg_id, install_path, update_status=False)
        if code != 0:
            err = msg
        code, msg = self.start(pkg_id, install_path, update_status=True) # 考虑到本身就是stoped状态
        if code != 0:
            err += msg
        if err != "":
            code = 1009
            msg = err
            return code, msg
        return 0, "ok"

    # 通过先stop,再start来实现重启
    def stopStart(self, pkg_id, install_path):
        err = ""
        code, msg = self.stop(pkg_id, install_path)
        if code != 0:
            err = msg
        code, msg = self.start(pkg_id, install_path)
        if code != 0:
            err += msg
        if err != "":
            code = 1009
            msg = err
            return code, msg
        return 0, "ok"

    # resolve
    def resolve(self, pkg_id, install_path):
        self.opLog.info("run resolve")
        conf_file = self.getConfigFile(pkg_id, install_path)
        code, msg = common.runConfig(conf_file, "resolve_script", install_path)
        self.opLog.info("resolve end,code:%s,msg:%s" % (code, msg))
        return code, msg
        
    @catch_except
    def stop(self, pkg_id, install_path, update_status=True):
        conf_file = self.getConfigFile(pkg_id, install_path)
        user, group = self.getUser(conf_file)
        # 修改文件属主
        common.chown_by_name(install_path, user, group)

        #删除crontab
        self.opLog.info("start to clear crontab")
        ret = self.pkgCom.getLock(os.path.dirname(self.CONF_BASE), filename="crontab.lock")
        if not ret:
            self.exit_proc(2005, "get lock error, please try again")
        shell = 'export VISUAL="%s/crontab.py del %s";crontab -e'%(self.curr_folder, install_path)
        code,msg = common.runShell(shell,user=user)
        if code != 0:
            self.exit_proc(2010, "del crontab error,code:%s,msg:%s"%(code,msg))
        ret = self.pkgCom.unLock(os.path.dirname(self.CONF_BASE), filename="crontab.lock")
        if not ret:
            self.exit_proc(2009, "unlock error")

        self.opLog.info("start to stop")
        code, msg = common.runConfig(conf_file, "stop_script", install_path)
        if code != 0:
            msg = '执行停止脚本失败,code=%s,msg=%s' % (code,msg)
            self.opLog.info(msg)
            return code, msg
        self.opLog.info("stop end, start to check process")
        err_app, ok_app, ok_port, err_port = self.pkgCom.checkStop(conf_file, install_path)
        # 如果是restart的话是不更新状态的
        if update_status:
            self.opLog.info("check end,update status")
            status = {
                'status': 'stopped',
                'time': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
                'successProcess': ok_app,
                'errorProcess': err_app,
                'successPort': ok_port,
                'errorPort': err_port
            }
            self.updateStatus(status)
        err_msg = ""
        code = 0
        if err_app or err_port:
            if err_app:
                err_msg += ",error process:" + ";".join(err_app)
            if err_port:
                err_msg += ",error port:" + ";".join(map(str, err_port))

            msg = 'stop failed %s' % (err_msg)
            code = 1010
            self.opLog.info(msg)
            return code, msg
        self.opLog.info("stop successfully")
        return code, msg

    def uninstall(self, pkg_id, install_path):
        code, msg = self.stop(pkg_id, install_path)
        back_path = "/tmp/" + pkg_id
        conf_bak_base = os.path.join(os.path.dirname(self.APP_BASE), 'confbak')
        if not os.path.exists(conf_bak_base):
            try:
                os.makedirs(conf_bak_base, 0o755)
            except OSError, exc:  # Python >2.5 (except OSError, exc: for Python <2.5)
                if exc.errno == errno.EEXIST and os.path.isdir(conf_bak_base):
                    pass
                else:
                    msg = " create bak path error"
                    code = 1001
                    print msg

        if code != 0:
            print msg
            # return code,msg
        code = 0

        # backup package
        if os.path.exists(install_path):
            back_dir = os.path.join(back_path, os.path.basename(install_path),
                                    time.strftime("%Y%m%d%H%M%S", time.localtime()))
            shutil.move(install_path, back_dir)

        # backup pakcage configfile
        conf_file = self.getConfigFile(pkg_id, install_path)
        conf_path = os.path.dirname(conf_file)
        if os.path.exists(conf_path):
            conf_bak_path = os.path.join(conf_bak_base, pkg_id)
            if not os.path.exists(conf_bak_path):
                os.makedirs(conf_bak_path, 0o755)
            shutil.move(conf_path, os.path.join(conf_bak_path, os.path.basename(conf_path) + "_" + str(time.time())))

        # backup config package configfile ,if config package exists
        pkg_info = self.pkgCom.getPkgId(install_path, 'conf')
        if pkg_info:
            conf_pkg_id = pkg_info['packageId']
            path = pkg_info['confPath']
        else:
            conf_pkg_id = None
            path = None
        if conf_pkg_id:
            conf_file = self.getConfigFile(conf_pkg_id, install_path)
            conf_path = os.path.dirname(conf_file)
            if os.path.exists(conf_path):
                conf_bak_path = os.path.join(conf_bak_base, conf_pkg_id)
                if not os.path.exists(conf_bak_path):
                    os.makedirs(conf_bak_path, 0o755)
                shutil.move(conf_path,
                            os.path.join(conf_bak_path, os.path.basename(conf_path) + "_" + str(time.time())))
        return code, msg