class Clear(): def __init__(self, app_base, install_path=None, debug=False): self.APP_BASE = app_base self.CONF_BASE = _CONF_BASE self.install_path = install_path self.pkgCom = PkgCommon() self.log_level = 'DEBUG' if debug else 'INFO' def run(self): if self.install_path == 'all': info_list = self.pkgCom.getPkgList() else: pkg_info = self.pkgCom.getPkgId(self.install_path) if pkg_info: info_list = {'pkg': [pkg_info]} else: return 1, "%s not a valid package" % (self.install_path) t_list = [] #只处理包类型为pkg的包 if 'pkg' in info_list: pkg_info_list = info_list['pkg'] else: pkg_info_list = [] for pkg_info in pkg_info_list: t_list.append(gevent.spawn(self.clear_pkg, pkg_info)) gevent.joinall(t_list) return 0, "ok" @catch_except def clear_pkg(self, pkg_info): gevent.sleep(random.randint(0, 15)) pkg_id, pkg_conf_path, install_path = pkg_info['packageId'], pkg_info[ 'confPath'], pkg_info['installPath'] st = time.time() cLog = self.pkgCom.configLog(install_path, 'clear', self.log_level) cLog.info('clear file start: %s' % install_path) conf_file = os.path.join(pkg_conf_path, "package.conf.yaml") configs = self.get_config(conf_file) for config in configs: path = config[0] limit = config[1] cmd = config[2] param = config[3] target = config[4] if not os.path.isabs(path): path = os.path.join(install_path, path) if cmd == "delete": ret = self.delete_file(path=path, target=target, param=param, limit=limit, log=cLog) elif cmd == "clear": ret = self.clear_file(path=path, target=target, param=param, limit=limit, log=cLog) # todo暂未实现 elif cmd == "tar": ret = self.tar_file(path=path, target=target, param=param, limit=limit, log=cLog) else: continue cLog.info('clear file end: %s %s' % (install_path, round(time.time() - st, 4))) return 0, 'ok' def get_config(self, config_file): clear_conf = common.getConfig(config_file, 'clear_file') # 处理以#开头的行 regex = r"\s*#" conf_arr = clear_conf.splitlines() real_conf = [] for line in conf_arr: ret = re.match(regex, line) if ret is None: # 处理#注释在行尾的情况 reg_2 = r"^((\"[^\"]*\"|'[^']*'|[^'\"#])*)(#.*)$" ret = re.match(reg_2, line) if ret is not None: conf_line = ret.group(1) else: conf_line = line conf = re.split(r'\s+', conf_line) if len(conf) < 5: continue real_conf.append(conf) return real_conf def delete_file(self, path, target, param, limit, log): code, msg = self.check_limit(path, limit) if code == 0: return code, msg target_reg = target.replace('*', '.*') if param.endswith('h'): limit_mtime = time.time() - int(param.strip('h')) * 3600 elif param.endswith('m'): limit_mtime = time.time() - int(param.strip('m')) * 60 elif param.endswith('d'): limit_mtime = time.time() - int(param.strip('d')) * 24 * 3600 else: limit_mtime = time.time() - int(param) * 24 * 3600 for root, dirs, files in os.walk(path): for name in files: filepath = os.path.join(root, name) if not os.path.exists(filepath): continue mtime = os.stat(filepath).st_mtime if mtime < limit_mtime: ret = re.match(target_reg, name) if ret: log.info("begin delete, file:[%s]" % (filepath)) try: os.remove(filepath) log.info("delete success, file:[%s]" % (filepath)) except Exception, e: log.info("delete failed, file:[%s]" % (filepath)) return 0, 'ok'
class Monitor(): def __init__(self, app_base, install_path=None, debug=False): self.APP_BASE = app_base self.CONF_BASE = _CONF_BASE self.install_path = install_path self.pkgCom = PkgCommon() self.log_level = 'DEBUG' if debug else 'INFO' def run(self): if self.install_path == 'all': info_list = self.pkgCom.getPkgList() else: pkg_info = self.pkgCom.getPkgId(self.install_path) if pkg_info: info_list = {'pkg': [pkg_info]} else: return 1, "%s not a valid package" % (self.install_path) t_list = [] #只处理包类型为pkg的包 if 'pkg' in info_list: pkg_info_list = info_list['pkg'] else: pkg_info_list = [] for pkg_info in pkg_info_list: t_list.append(gevent.spawn(self.monitorPkg, pkg_info)) gevent.joinall(t_list) return 0, "ok" def reportStatus(self, pkg_id, pkg_conf_path, inst_info, status, err_info={}): reportInfo = {} reportInfo['dims'] = {} reportInfo['vals'] = {} reportInfo['dims']['process.process.package_id'] = pkg_id reportInfo['dims']['process.process.version_id'] = "" reportInfo['dims']['process.process.install_path'] = inst_info[ 'installPath'] reportInfo['vals']['process.process.package_status'] = status if not err_info.get('err_proc') and not err_info.get('err_port'): reportInfo['vals']['process.process.alert_status'] = 0 else: reportInfo['vals']['process.process.alert_status'] = 1 conf_file = os.path.join(pkg_conf_path, 'package.conf.yaml') proc_config = common.getConfig(conf_file, "proc_list") port_config = common.getConfig(conf_file, "port_list") proc_list = {} for proc in proc_config: proc_list[proc['proc_name']] = proc normal_proc_list = [] proc_num_list = self.pkgCom.getProcNum() normal_proc_str = "" for proc_name in err_info.get('ok_proc', []): num_min = proc_list[proc_name].get('proc_num_min', 0) num_max = proc_list[proc_name].get('proc_num_max', 0) proc_str = "%s:%s,%s|%s" % (proc_name, num_min, num_max, proc_num_list[proc_name]) normal_proc_list.append(proc_str) normal_proc_str = '##'.join(normal_proc_list) abnormal_proc_list = [] abnormal_proc_str = "" for proc_name in err_info.get('err_proc', []): num_min = proc_list[proc_name].get('proc_num_min', 0) num_max = proc_list[proc_name].get('proc_num_max', 0) proc_str = "%s:%s,%s|%s" % (proc_name, num_min, num_max, proc_num_list[proc_name]) abnormal_proc_list.append(proc_str) abnormal_proc_str = '##'.join(abnormal_proc_list) normal_port_list = [] normal_port_str = "" for port in err_info.get('ok_port', []): normal_port_list.append(port) normal_port_str = "##".join(map(str, normal_port_list)) abnormal_port_list = [] abnormal_port_str = "" for port in err_info.get('err_port', []): abnormal_port_list.append(port) abnormal_port_str = "##".join(map(str, abnormal_port_list)) reportInfo['vals'][ 'process.process.normal_processes'] = normal_proc_str reportInfo['vals'][ 'process.process.abnormal_processes'] = abnormal_proc_str reportInfo['vals']['process.process.normal_ports'] = normal_port_str reportInfo['vals'][ 'process.process.abnormal_ports'] = abnormal_port_str if report: ret, msg = report(data_id=3000, dims=reportInfo['dims'], vals=reportInfo['vals']) @catch_except # 确保不会相互影响 def monitorPkg(self, pkg_info): import random gevent.sleep(random.randint(0, 15)) pkg_id, pkg_conf_path, pkg_install_path = pkg_info[ 'packageId'], pkg_info['confPath'], pkg_info['installPath'] # 初始化log配置 st = time.time() mLog = self.pkgCom.configLog(pkg_install_path, 'monitor', self.log_level) mLog.info('check process start: %s' % pkg_install_path) # 读取实例信息 inst_conf_file = os.path.join(pkg_conf_path, 'instance.conf.yaml') with open(inst_conf_file, 'r') as fp: conf_info = yaml.load(fp) inst_info = conf_info.pop() # 检查包是否已启动 status_conf_file = os.path.join(pkg_conf_path, 'package.status') if not os.path.isfile(status_conf_file): return 0, 'ok' with open(status_conf_file, 'r') as fp: status_info = yaml.load(fp) start_status = status_info['status'] if start_status == 'stopped': self.reportStatus(pkg_id, pkg_conf_path, inst_info, status=start_status) mLog.info('package status is stopped: %s %s' % (pkg_info['installPath'], round(time.time() - st, 4))) return 0, 'ok' # 获得文件锁,准备检查。且快速失败,因为进程检查本身就是1分钟执行一次的 ret = self.pkgCom.getLock(pkg_conf_path, timeout=10) if not ret: mLog.error("get lock error") return 2008, "get lock error" # 根据包配置信息检查包的进程状态 conf_file = os.path.join(pkg_conf_path, 'package.conf.yaml') err_proc, ok_proc = self.pkgCom.checkProcStatus( conf_file, install_path=inst_info.get('installPath')) err_port, ok_port = self.pkgCom.checkPort( conf_file, install_path=inst_info.get('installPath')) proc_config = common.getConfig(conf_file, "proc_guard") port_config = common.getConfig(conf_file, "port_guard") err_info = { 'err_proc': err_proc, 'ok_proc': ok_proc, 'err_port': err_port, 'ok_port': ok_port } self.reportStatus(pkg_id, pkg_conf_path, inst_info, status=start_status, err_info=err_info) code = 0 msg = 'ok' err_msg = "" if err_proc: err_msg += ",error process:" + ",".join(err_proc) if err_port: err_msg += ",error port:" + ",".join(map(str, err_port)) # 包操作对象 op = pkgOp(self.APP_BASE, inst_info['installPath']) if (err_proc and proc_config == 'stopStart') or (err_port and port_config == 'stopStart'): msg = "process error,monitor run stopStart:" + err_msg mLog.info(msg) code, msg = op.stopStart(inst_info['packageId'], inst_info['installPath']) elif (err_proc and proc_config == 'custom') or (err_port and port_config == 'custom'): msg = "process error,monitor run custom script:" + err_msg mLog.info(msg) code, msg = op.resolve(inst_info['packageId'], inst_info['installPath']) elif err_proc or err_port: msg = "process error,do nothing:" + err_msg mLog.info(msg) # 解锁 self.pkgCom.unLock(pkg_conf_path) mLog.info('check process end: %s %s' % (pkg_info['installPath'], round(time.time() - st, 4))) return code, msg
class pkgOp: def __init__(self, app_base="", install_path="", debug=False): global CURR_FOLDER self.curr_folder = CURR_FOLDER self.APP_BASE = app_base self.CONF_BASE = _CONF_BASE self.pkgCom = PkgCommon() if not install_path: self.install_path = os.getcwd() else: self.install_path = install_path pkg_info = self.pkgCom.getPkgId(self.install_path) if pkg_info : self.pkg_id = pkg_info['packageId'] self.pkg_conf_path = pkg_info['confPath'] log_level = 'DEBUG' if debug else 'INFO' self.opLog = self.pkgCom.configLog(pkg_info['installPath'],'op', log_level) else: self.pkg_id = None self.pkg_conf_path = None self.fp = {} def getUser(self,conf_file): cur_user = pwd.getpwuid(os.getuid()).pw_name cur_group = grp.getgrgid(os.getgid()).gr_name conf_dict = yaml.load(file(conf_file, 'r')) if not conf_dict.get('user'): user, group = cur_user, cur_group else: # 兼容root:root或者root.root的两种方式 tmp = [s.strip() for s in re.split('\.|:', conf_dict['user'])] if len(tmp) == 1: # 如果只填了用户 user, group = tmp[0], tmp[0] else: user, group = tmp[0:2] return user, group @catch_except def start(self, pkg_id, install_path, update_status=True): conf_file = self.getConfigFile(pkg_id, install_path) user, group = self.getUser(conf_file) # 修改文件属主 common.chown_by_name(install_path, user, group) # 添加crontab self.opLog.info("start to add crontab") ret = self.pkgCom.getLock(os.path.dirname(self.CONF_BASE), filename="crontab.lock") if not ret: self.exit_proc(2005, "get lock error, please try again") shell = 'export VISUAL="%s/crontab.py add %s";echo "no"|crontab -e'%(self.curr_folder, install_path) code,msg = common.runShell(shell,user=user) if code != 0: self.exit_proc(2010, "add crontab error,code:%s,msg:%s"%(code,msg)) ret = self.pkgCom.unLock(os.path.dirname(self.CONF_BASE), filename ="crontab.lock") if not ret: self.exit_proc(2009, "unlock error") # 执行启动脚本 self.opLog.info("start to start") code, start_msg = common.runConfig(conf_file, "start_script", install_path) if code != 0: msg = '执行启动脚本失败, code=%s,msg=%s' % (code,start_msg) self.opLog.info(msg) return code, msg msg = start_msg self.opLog.info("start end, start to check process") # 检查启动结果 err_app, ok_app, err_port, ok_port = self.pkgCom.checkStart(conf_file, install_path) # 更新包状态 if update_status: self.opLog.info("check end,update status") status = { 'status': 'started', 'time': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'successProcess': ok_app, 'errorProcess': err_app, 'successPort': ok_port, 'errorPort': err_port } self.updateStatus(status) err_msg = "" if err_app or err_port: if err_app: err_msg += ",异常进程:" + ";".join(err_app) if err_port: err_msg += ",异常端口:" + ";".join(map(str, err_port)) msg = '启动失败 %s,启动返回信息:%s' % (err_msg,start_msg) self.opLog.info(msg) code = 1009 return code, msg self.opLog.info("start successfully ") return code, msg # 单独配置的启动脚本 def restart(self, pkg_id, install_path): err = "" code, msg = self.stop(pkg_id, install_path, update_status=False) if code != 0: err = msg code, msg = self.start(pkg_id, install_path, update_status=True) # 考虑到本身就是stoped状态 if code != 0: err += msg if err != "": code = 1009 msg = err return code, msg return 0, "ok" # 通过先stop,再start来实现重启 def stopStart(self, pkg_id, install_path): err = "" code, msg = self.stop(pkg_id, install_path) if code != 0: err = msg code, msg = self.start(pkg_id, install_path) if code != 0: err += msg if err != "": code = 1009 msg = err return code, msg return 0, "ok" # resolve def resolve(self, pkg_id, install_path): self.opLog.info("run resolve") conf_file = self.getConfigFile(pkg_id, install_path) code, msg = common.runConfig(conf_file, "resolve_script", install_path) self.opLog.info("resolve end,code:%s,msg:%s" % (code, msg)) return code, msg @catch_except def stop(self, pkg_id, install_path, update_status=True): conf_file = self.getConfigFile(pkg_id, install_path) user, group = self.getUser(conf_file) # 修改文件属主 common.chown_by_name(install_path, user, group) #删除crontab self.opLog.info("start to clear crontab") ret = self.pkgCom.getLock(os.path.dirname(self.CONF_BASE), filename="crontab.lock") if not ret: self.exit_proc(2005, "get lock error, please try again") shell = 'export VISUAL="%s/crontab.py del %s";crontab -e'%(self.curr_folder, install_path) code,msg = common.runShell(shell,user=user) if code != 0: self.exit_proc(2010, "del crontab error,code:%s,msg:%s"%(code,msg)) ret = self.pkgCom.unLock(os.path.dirname(self.CONF_BASE), filename="crontab.lock") if not ret: self.exit_proc(2009, "unlock error") self.opLog.info("start to stop") code, msg = common.runConfig(conf_file, "stop_script", install_path) if code != 0: msg = '执行停止脚本失败,code=%s,msg=%s' % (code,msg) self.opLog.info(msg) return code, msg self.opLog.info("stop end, start to check process") err_app, ok_app, ok_port, err_port = self.pkgCom.checkStop(conf_file, install_path) # 如果是restart的话是不更新状态的 if update_status: self.opLog.info("check end,update status") status = { 'status': 'stopped', 'time': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'successProcess': ok_app, 'errorProcess': err_app, 'successPort': ok_port, 'errorPort': err_port } self.updateStatus(status) err_msg = "" code = 0 if err_app or err_port: if err_app: err_msg += ",error process:" + ";".join(err_app) if err_port: err_msg += ",error port:" + ";".join(map(str, err_port)) msg = 'stop failed %s' % (err_msg) code = 1010 self.opLog.info(msg) return code, msg self.opLog.info("stop successfully") return code, msg def uninstall(self, pkg_id, install_path): code, msg = self.stop(pkg_id, install_path) back_path = "/tmp/" + pkg_id conf_bak_base = os.path.join(os.path.dirname(self.APP_BASE), 'confbak') if not os.path.exists(conf_bak_base): try: os.makedirs(conf_bak_base, 0o755) except OSError, exc: # Python >2.5 (except OSError, exc: for Python <2.5) if exc.errno == errno.EEXIST and os.path.isdir(conf_bak_base): pass else: msg = " create bak path error" code = 1001 print msg if code != 0: print msg # return code,msg code = 0 # backup package if os.path.exists(install_path): back_dir = os.path.join(back_path, os.path.basename(install_path), time.strftime("%Y%m%d%H%M%S", time.localtime())) shutil.move(install_path, back_dir) # backup pakcage configfile conf_file = self.getConfigFile(pkg_id, install_path) conf_path = os.path.dirname(conf_file) if os.path.exists(conf_path): conf_bak_path = os.path.join(conf_bak_base, pkg_id) if not os.path.exists(conf_bak_path): os.makedirs(conf_bak_path, 0o755) shutil.move(conf_path, os.path.join(conf_bak_path, os.path.basename(conf_path) + "_" + str(time.time()))) # backup config package configfile ,if config package exists pkg_info = self.pkgCom.getPkgId(install_path, 'conf') if pkg_info: conf_pkg_id = pkg_info['packageId'] path = pkg_info['confPath'] else: conf_pkg_id = None path = None if conf_pkg_id: conf_file = self.getConfigFile(conf_pkg_id, install_path) conf_path = os.path.dirname(conf_file) if os.path.exists(conf_path): conf_bak_path = os.path.join(conf_bak_base, conf_pkg_id) if not os.path.exists(conf_bak_path): os.makedirs(conf_bak_path, 0o755) shutil.move(conf_path, os.path.join(conf_bak_path, os.path.basename(conf_path) + "_" + str(time.time()))) return code, msg