def need_to_restart(): """ auto_restart=True而且长时间没有touchdb的 :return: """ # 超时没有touch db的 processes = Process.select().where( Process.status == STATUS.RUNNING, Process.auto_restart == 1, Process.timeout_timestamp < int(time.time())) for process in processes: if Process.update(status=STATUS.EXITED) \ .where(Process.id == process.id, Process.status == STATUS.RUNNING, Process.auto_restart == 1, Process.timeout_timestamp < int(time.time())) \ .execute() == 1: logger.info("重启进程%s" % process.id) restart_process(program_id=process.id) # 处于exited状态的 processes = Process.select().where(Process.status == STATUS.EXITED, Process.auto_restart == 1) for process in processes: if Process.update(status=STATUS.EXITED) \ .where(Process.id == process.id, Process.status == STATUS.EXITED, Process.auto_restart == 1) \ .execute() == 1: logger.info("重启进程%s" % process.id) restart_process(program_id=process.id)
def touch_db(program_id, touch_timeout): logger.info("进程%s运行中..." % program_id) timeout_timestamp = int(time.time() + touch_timeout) # 如果程序处于STARTING或STOPPING状态,无需touch db try: process = ProcessModel.select() \ .where(ProcessModel.id == program_id) \ .get() except DoesNotExist: logger.warning("touch_db失败,没有这条记录,%s可能已停止" % program_id) return False if process.status in (STATUS.STARTING, STATUS.STOPPING): return True # noinspection PyBroadException try: ret = ProcessModel.update(timeout_timestamp=timeout_timestamp) \ .where(ProcessModel.id == program_id, ProcessModel.status == STATUS.RUNNING) \ .execute() except Exception: logger.exception("touch_db异常") return False if ret == 0: logger.warning("touch_db失败,没有这条记录,%s可能已停止" % program_id) return False return True
def need_to_reset_status(): cnt = Process.update(status=STATUS.RUNNING) \ .where(Process.status == STATUS.STOPPING, Process.update_time + 60 < int(time.time())) \ .execute() if cnt: logger.info("重置了%s个进程的状态为RUNNING" % cnt) cnt = Process.update(status=STATUS.STOPPED) \ .where(Process.status == STATUS.STARTING, Process.update_time + 60 < int(time.time())) \ .execute() if cnt: logger.info("重置了%s个进程的状态为STOPPED" % cnt)
def get_program(program_id=None, program_name=None, status=None): """ 单条查询程序信息,program_id和program_name至少需要一个 :param program_id: 程序ID :param program_name: 程序名称 :param status: 程序状态 :return: 程序对象 """ if program_id is not None and program_name is not None: raise exceptions.ParamValueException("program_id和program_name至少需要一个") conditions = [] if program_id is not None: conditions.append(Process.id == program_id) if program_name is not None: conditions.append(Process.name == program_name) if status is not None: conditions.append(Process.status == status) try: program = Process.select().where(*conditions).get() except DoesNotExist: raise exceptions.ProgramNotExistInDB() except DatabaseError: logger.exception("查询程序%s时,数据库发生异常" % program_id) raise exceptions.MySQLDBException("查询程序时,数据库发生异常") return program
def local_stop(program_id, wait_timeout=10): try: process = Process.select().where(Process.id == program_id).get() except DoesNotExist: msg = "找不到进程%s的配置数据,无法启动" % program_id logger.warning(msg) raise exceptions.NoConfigException(msg) if process.status == STATUS.STOPPED: msg = "进程%s已停止,忽略本次请求" % process.name logger.warning(msg) raise exceptions.AlreadyStopException() pid = get_pid(program_id) if pid == 0: logger.warning("要停止的进程不存在,忽略") return True args = ['kill', str(pid)] subprocess.Popen(args).wait() if not wait_until_stop_done(program_id, wait_timeout): logger.warning("进程%s停止失败" % process.name) raise exceptions.StopException() return True
def get_programs(status=None): """ 批量查询程序信息 :param status: 程序状态 :return: 程序对象列表 """ conditions = [] if status is not None: conditions.append(Process.status == status) try: if conditions: return Process.select().where(*conditions) else: return Process.select() except DatabaseError: logger.exception("查询程序列表时,数据库发生异常") raise exceptions.MySQLDBException("查询程序列表时,数据库发生异常")
def update_program(program_id, **fields): """ 更新一条进程 :param program_id: :return: True - 更新成功 False - 更新失败 """ try: Process.select().where(Process.id == program_id).get() except DoesNotExist: msg = "程序id=%s的配置不存在" % program_id logger.error(msg) raise exceptions.NoConfigException(msg) Process.update(**fields) \ .where(Process.id == program_id) \ .execute() return True
def get_status(program_id): try: process = Process.select().where(Process.id == program_id).get() except DoesNotExist: return False if process.status == STATUS.STOPPED: return False return check_process(program_id)
def change_status(program_id, from_status, to_status): """ 修改程序状态 :param program_id: 程序ID :param from_status: 修改前的状态元组。程序状态不在from_status中时,修改失败 :param to_status: 修改后的状态 :return: True - 修改成功 False - 修改失败 """ if isinstance(from_status, int): from_status = (from_status, ) fields = {"status": to_status, "update_time": tools.get_now_time()} try: process = Process.select().where(Process.id == program_id).get() except DoesNotExist: logger.error("程序%s不存在" % program_id) return False except DatabaseError: logger.exception("查询进程%s时,数据库发生异常" % program_id) return False if process.status not in from_status: logger.error("程序%s的状态%s不是%s" % (program_id, process.status, from_status)) return False try: ret_code = Process.update(fields) \ .where(Process.id == program_id, Process.status << from_status).execute() except DatabaseError: logger.exception("程序%s的状态%s=>%s更新时,数据库发生异常" % (program_id, from_status, to_status)) return False if ret_code == 0: logger.warning("程序%s的状态%s=>%s更新失败,ID或状态不匹配" % (program_id, from_status, to_status)) return False return True
def local_start(program_id, wait=3): try: process = Process.select().where(Process.id == program_id).get() except DoesNotExist: msg = "找不到进程%s的配置数据,无法启动" % program_id logger.warning(msg) raise exceptions.NoConfigException(msg) if process.status == STATUS.RUNNING: msg = "进程%s已启动,忽略本次请求" % process.name logger.warning(msg) raise exceptions.AlreadyStartException() command = process.command directory = process.directory environment = process.environment touch_timeout = process.touch_timeout stdout_logfile = process.stdout_logfile stderr_logfile = process.stderr_logfile info = { 'program_id': process.id, 'program_name': process.name, 'directory': directory, 'environment': environment, 'touch_timeout': touch_timeout, 'stdout_logfile': stdout_logfile, 'stderr_logfile': stderr_logfile, } args = json.dumps(command) info = json.dumps(info) p = subprocess.Popen(['dswrapper', args, info], stdout=subprocess.PIPE) p.wait() if p.returncode != 0: logger.warning("进程%s启动失败" % process.name) raise exceptions.StartException() if not check_start_status(program_id, wait): logger.warning("进程%s启动失败" % process.name) raise exceptions.StartException() return True
def create_program(program_id, program_name, command, machines, directory, environment, auto_start, auto_restart, touch_timeout, max_fail_count, stdout_logfile, stderr_logfile): """ 添加一条程序 :param program_id: UUID :param program_name: :param command: :param machines: :param directory: :param environment: :param auto_start: :param auto_restart: :param touch_timeout: :param max_fail_count: :param stdout_logfile: :param stderr_logfile: :return: 程序对象 """ try: fields = dict(id=program_id, name=program_name, command=command, machines=machines, directory=directory, environment=environment, auto_start=auto_start, auto_restart=auto_restart, touch_timeout=touch_timeout, max_fail_count=max_fail_count, stdout_logfile=stdout_logfile, stderr_logfile=stderr_logfile, machine="", status=STATUS.STOPPED, fail_count=0, timeout_timestamp=0x7FFFFFFF) program = Process.create(**fields) program.save() return program except DatabaseError: logger.exception("新增程序时,数据库发生异常") raise exceptions.MySQLDBException("新增程序时,数据库发生异常")