Example #1
0
def get_znode_tree_from_qconf(cluster_name, path, nodes, current_id='1', parent_id='0'):
    """get zookeeper nodes from qconf recursively, format as ztree data
    """
    from lib.zyqconf import qconf_py

    # 节点名只取最末尾的名称
    name = path if path == "/" else path.rsplit('/', 1)[-1]
    nodes.append({
        "id": current_id,
        "pId": parent_id,
        "name": name,
        "path": path
    })

    children = []
    try:
        children = qconf_py.get_batch_keys(path, cluster_name)
    except qconf_py.Error as exc:
        # fix bug for qconf get_batch_keys from root path "/"
        if exc.message == "Error parameter!":
            zoo_client = ZookeeperService.get_zoo_client(cluster_name)
            children = zoo_client.get_children(path)
        else:
            log.warning('Node does not exists on QConf agent: %s', path)

    for idx, child in enumerate(children):
        # 左填充0到数字, 避免树的广度过宽,id冲突错误, 01, 09...
        idx = '{0:02d}'.format(idx)
        # parent_id as 1, then child_id should be 10, 11, 12...
        child_id = "{0}{1}".format(current_id, idx)
        child_path = os.path.join(path, str(child))
        get_znode_tree_from_qconf(cluster_name, child_path, nodes, child_id, current_id)
Example #2
0
def get_znode_tree(zoo_client, path, nodes, current_id='1', parent_id='0'):
    """get zookeeper nodes recursively, format as ztree data
    """
    # 节点名只取最末尾的名称
    name = path if path == "/" else path.rsplit('/', 1)[-1]
    nodes.append({
        "id": current_id,
        "pId": parent_id,
        "name": name,
        "path": path
    })

    try:
        children = zoo_client.get_children(path)
    except NoNodeError as exc:
        log.warning('Node does not exists on zookeeper: %s', path)
    else:
        for idx, child in enumerate(children):
            # 左填充0到数字, 避免树的广度过宽,id冲突错误, 01, 09...
            idx = '{0:02d}'.format(idx)
            # parent_id as 1, then child_id should be 10, 11, 12...
            child_id = "{0}{1}".format(current_id, idx)
            # child_path = os.path.join(path, child)
            child_path = path + child if path == "/" else path + "/" + child
            ## child node edit
            # child_name = child_path if child_path == "/" else child_path.rsplit('/', 1)[-1]
            # nodes.append({
            #     "id": child_id,
            #     "pId": current_id,
            #     "name": child_name,
            #     "path": child_path
            # })
            get_znode_tree(zoo_client, child_path, nodes, child_id, current_id)
Example #3
0
def get_znode_tree_from_qconf(cluster_name,
                              path,
                              nodes,
                              current_id='1',
                              parent_id='0'):
    """get zookeeper nodes from qconf recursively, format as ztree data
    """
    from lib.zyqconf import qconf_py

    # 节点名只取最末尾的名称
    name = path if path == "/" else path.rsplit('/', 1)[-1]
    nodes.append({
        "id": current_id,
        "pId": parent_id,
        "name": name,
        "path": path
    })

    children = []
    try:
        children = qconf_py.get_batch_keys(path, cluster_name)
    except qconf_py.Error as exc:
        # fix bugs for qconf's get_batch_keys error while path is root path("/")
        if exc.message == "Error parameter!":
            zoo_client = ZookeeperService.get_zoo_client(cluster_name)
            children = zoo_client.get_children(path)
        else:
            log.warning('Node does not exists on QConf Agent, path: %s', path)

    for idx, child in enumerate(children):
        child_path = os.path.join(path, str(child))
        # 如果父节点ID为1,则它的子节点ID应为101, 102, 103(左填充0到数字, 避免树的广度过宽,id冲突错误, 01, 09...)
        child_id = "{0}{1:02d}".format(current_id, idx)
        get_znode_tree_from_qconf(cluster_name, child_path, nodes, child_id,
                                  current_id)
Example #4
0
def log_format(instance, func_name=None, params=None, error_info=None):
    """格式化log信息
    Args:
        instance: 类实例,当前业务环境下针对Handler类
        func_name: 类中调用返回为空的方法
        params: str 需要在log中说明的参数
        error_info: error级log的错误信息
    """
    if inspect.isclass(type(instance)):
        module_name = instance.__module__
        class_name = instance.__class__.__name__
        if not params:
            params = instance.request.uri
        end_time = time.time()
        spend_time = round((end_time - instance._start_time) * 1000, 2)
        if error_info:
            log.error('%s.%s faild spend_time:%sms params:(%s) error info:%s',
                      module_name, class_name, spend_time, params, error_info)
            return
        if func_name:
            log.warning('%s.%s call %s faild spend_time:%sms params:(%s)',
                        module_name, class_name, func_name, spend_time, params)
        else:
            log.warning('%s.%s faild spend_time:%sms params:(%s)', module_name,
                        class_name, spend_time, params)
Example #5
0
    def _exec(self):
        """with退出处理
        """
        try:
            self._args_set()
        except ValueError as e:
            log_format(self, error_info=e)
            msg = "参数错误,请检查参数后再请求! {}".format(e)
            return self.send_obj({"status": 1000, "msg": msg})
        try:
            res = self.response()
        except OperationalError as e:
            # Mysql 连接错误
            log.warning("Mysql OperationalError: %s", e)
            reconnect()
            # 如果连接失败重新加载当前请求页面
            return self.redirect(self.request.uri)
        except Exception:
            import traceback

            e = traceback.format_exc()
            log.error("Internal Error: %s", e)
            return self.send_obj({"status": 10001, "msg": str(e)})
        if not self._finished:
            return self.send_obj(res)
Example #6
0
def get_znode_tree_from_qconf(cluster_name, path, nodes, current_id='1', parent_id='0'):
    """get zookeeper nodes from qconf recursively, format as ztree data
    """
    from lib.zyqconf import qconf_py

    # 节点名只取最末尾的名称
    name = path if path == "/" else path.rsplit('/', 1)[-1]
    nodes.append({
        "id": current_id,
        "pId": parent_id,
        "name": name,
        "path": path
    })

    children = []
    try:
        children = qconf_py.get_batch_keys(path, cluster_name)
    except qconf_py.Error as exc:
        # fix bugs for qconf's get_batch_keys error while path is root path("/")
        if exc.message == "Error parameter!":
            zoo_client = ZookeeperService.get_zoo_client(cluster_name)
            children = zoo_client.get_children(path)
        else:
            log.warning('Node does not exists on QConf Agent, path: %s', path)

    for idx, child in enumerate(children):
        child_path = os.path.join(path, str(child))
        # 如果父节点ID为1,则它的子节点ID应为101, 102, 103(左填充0到数字, 避免树的广度过宽,id冲突错误, 01, 09...)
        child_id = "{0}{1:02d}".format(current_id, idx)
        get_znode_tree_from_qconf(cluster_name, child_path, nodes, child_id, current_id)
Example #7
0
 def close(self, commit=True):
     # TODO закрывать через очередь
     self.execute("UPDATE lootnika SET self_status='shutdown successfully'")
     if commit:
         self.execute('--commit--')
     else:
         log.warning("Closing Datastore without commit")
     self.execute('--close--')
Example #8
0
    def run(self):
        n = 1
        crash = False  # при краше нужно только обновлтяь статусы потоков
        while True:
            self.resources_usage()
            self.threads_names()

            # for i in get_threads():
            #     sout.print(f'{i} {i.isAlive()}', 'green')
            # print('------')

            for i in self.myThreads:
                if i in self.allThreads:
                    # message(i+ ' Run',clrSun)
                    self.myThreads[i] = True
                else:
                    self.myThreads[i] = False

            # отмечает какие модули запустились
            self.isVerified = True
            for i in self.myThreads:
                if not self.myThreads[i]:
                    self.isVerified = False

            if not crash:
                # ждёт запуска всех модулей
                if not self.started:
                    if self.isVerified:
                        if sys.argv[0].lower().endswith('.exe'):
                            log.info(
                                f"Lootnika started - Executable version: {__version__}_{platform}"
                            )
                        else:
                            log.info(
                                f"Lootnika started - Source version: {__version__}_{platform}"
                            )

                        log.info(
                            f"Welcome to http://localhost:{cfg['rest']['port']}/admin"
                        )
                        ds.execute("UPDATE lootnika SET self_status='working'")
                        self.started = True
                        self.rate = 2  # уже можно реже смотреть
                    else:
                        n += 1
                        if n == 20:  # ограничение времени запуска
                            crash = self.crash(
                                'One of the modules does not work correctly')
                        elif n == 10:
                            log.warning("detected slow Lootnika startup")
                # иначе следит за их работой
                else:
                    if not self.isVerified and not self.exit:
                        crash = self.crash(
                            "One of the modules does not work correctly")

            time.sleep(self.rate)
Example #9
0
def shutdown_me(signum, frame, appServerSvc=None):
    # ловит ctrl-C. Останавливает модули в нужном порядке
    log.warning('Stopping...')

    if appServerSvc:
        appServerSvc.daemon.exit()

    log.info("Shutdown is successful")
    os._exit(0)
Example #10
0
def shutdown_me(signum=1, frame=1):
    """
    Останавливает модули в нужном порядке
    """
    log.warning(
        f'Lootnika stopping on {cfg["rest"]["host"]}:{cfg["rest"]["port"]}')
    if selfControl.exit:
        return

    selfControl.exit = True
    selfControl.rate = 0.3
    n = 0
    try:
        while True:
            time.sleep(0.3)
            if not bool(selfControl.myThreads):
                break

            if selfControl.myThreads['RestServer']:
                if n < 1:
                    log.debug("Stopping REST server")
                    try:
                        if cfg["rest"]["host"] in ['::1', '0.0.0.0']:
                            host = '127.0.0.1'
                        else:
                            host = cfg["rest"]["host"]

                        cnx = httpClient.HTTPConnection(host,
                                                        cfg["rest"]["port"],
                                                        timeout=12)
                        cnx.request(method="GET", url='/a=stop?stop')
                        cnx.getresponse()
                    except Exception:
                        pass
                    n = 1
                    continue
            elif selfControl.myThreads['Scheduler']:
                if n < 2:
                    log.debug("Stopping Scheduler thread")
                    scheduler.cmd = 'stop'
                    n = 2
            elif selfControl.myThreads['Datastore']:
                if n < 3:
                    log.debug("Stopping Datastore thread")
                    ds.close()
                    n = 3
            else:
                break

    except Exception as e:
        log.error(f'Shutdown failed: {traceback.format_exc()}')
    finally:
        selfControl.stop = True
        log.info("Lootnika stopped")
        if not stillWork:
            os._exit(1)
Example #11
0
def _get_recursively(zoo_client, path, nodes):
    """get zookeeper nodes recursively
    """
    try:
        data, _ = zoo_client.get(path)
    except NoNodeError as exc:
        log.warning("No node exists in path: %s", path)
    else:
        nodes.append({"path": path, "data": data})
        for child in zoo_client.get_children(path):
            child_path = os.path.join(path, child)
            _get_recursively(zoo_client, child_path, nodes)
Example #12
0
def _get_recursively(zoo_client, path, nodes):
    """get zookeeper nodes recursively
    """
    try:
        data, _ = zoo_client.get(path)
    except NoNodeError as exc:
        log.warning("No node exists in path: %s", path)
    else:
        nodes.append({"path": path, "data": data})
        for child in zoo_client.get_children(path):
            child_path = os.path.join(path, child)
            _get_recursively(zoo_client, child_path, nodes)
Example #13
0
    def _work_manager(self, taskName: str = '', lastTask: str = '', cmd=False):
        """
        Обёртка исполнителя задания (Picker). Работает как таймер чтобы
        отложить запуск до заданного времени. Потому проверка статуса
        планировщика так же выполняется здесь.

        NOTE: сейчас планировщик сам проверяет время старта и запускает
        задание сразу
        """
        self._update_startTime()
        if not (self.status == 'ready' or self.status == 'wait'):
            log.warning(
                f'Previous task is still running. Next start will be at {self.startTime}'
            )
            return

        self.status = 'work'  # он должен работатьт только при ready
        if self.taskCycles > 0:
            self.taskCycles -= 1
        # if self.taskCycles==0: self.startTime = None

        # task может быть только при cmd=True
        if taskName != '':
            self._start_task(taskName)
        else:
            if not cmd:
                log.info('New tasks cycle')
            else:
                log.info('Start all tasks')

            for taskName in self.taskList:
                # в случае отмены не продолжать
                if self.status == 'cancel':  # далее уже сам воркер следит даже если пауза
                    self.curTask = ''
                    self.status = 'ready'
                    return
                else:
                    self._start_task(taskName)

        self.curTask = ''
        if self.taskCycles > 0:
            self.status = 'wait'
        else:
            self.status = 'ready'

        if cmd:
            log.info('All tasks completed')
        else:
            if self.startTime is None:
                log.info('Tasks cycle done. Task replays are over')
            else:
                log.info(f'Tasks cycle done. Left: {self.taskCycles}')
Example #14
0
 def __init__(self, db: str):
     super(Datastore, self).__init__()
     log.debug(f"Starting Datastore thread")
     self.db = db
     self.name = 'Datastore'
     self.status = {}
     self.requestQ = Queue()
     self.isRestored = False
     self.isMemory = False
     self.isReady = False
     if db == ':memory:':
         log.warning('Using inMemory Datastore')
         self.isMemory = True
     self.start()
Example #15
0
def log_inspector():
    log.debug("log_inspector started")
    selfName = 'log_inspector'
    while True:
        try:
            for taskName, task in cfg['tasks']['logTask'].items():
                log.info(f"Check log {taskName}")
                logFile = task['file']
                templates = task['tmpl']

                try:
                    # TODO open if file is changed
                    with open(logFile, encoding='utf-8') as f:
                        cnt = f.read()

                    for tmplName in templates:
                        tmpl = templater.get_tmpl(selfName, tmplName)
                        if tmpl in cnt:
                            ev = f"Found log expression {taskName}: {tmplName}"
                            log.warning(ev)
                            body = templater.tmpl_fill(selfName,
                                                       'error').replace(
                                                           '{{taskName}}',
                                                           taskName, -1)
                            event = 'error'

                            new_toast('log_inspector', event)
                            if 'eventScript' in task:
                                allowSend, body = execute_event_script(
                                    log, task['eventScript'], taskName, event,
                                    body)
                            else:
                                allowSend = True

                            if allowSend:
                                send_notify(taskName, event, body)

                except FileNotFoundError:
                    log.error(f"Not found log file {taskName}")
                except Exception as e:
                    log.error(f"Fail to parse log file {taskName}: {e}")

            sleep(intervalCheckMin * 2)
        except Exception:
            e = traceback.format_exc()
            log.critical(str(e))
            break
Example #16
0
    def _start_task(self, taskName: str):
        self.curTask = taskName
        log.info(f'Start task {taskName}')
        try:
            lg = create_task_logger(taskName, console)
            ts = TaskStore(taskName, lg,
                           self.taskList[taskName]['overwriteTaskstore'])
            taskId = self._mark_task_start(taskName)

            # [total ,seen, new, differ, delete, task error, export error, last doc id]
            self.syncCount[taskId] = [-1, 0, 0, 0, 0, 0, 0, '']
            cf = self.taskList[taskName]

            fc = Factory(taskName, lg, cfg['exporters'][cf['exporter']],
                         self.syncCount[taskId])
            picker = self.Picker(taskId, taskName, cf, lg, ts, fc,
                                 self.syncCount[taskId])
            picker.run()

            tab = '\n' + '\t' * 5
            lg.info(f"Task done"
                    f"{tab}Total objects: {self.syncCount[taskId][0]}"
                    f"{tab}Seen: {self.syncCount[taskId][1]}"
                    f"{tab}New: {self.syncCount[taskId][2]}"
                    f"{tab}Differ: {self.syncCount[taskId][3]}"
                    f"{tab}Deleted: {self.syncCount[taskId][4]}"
                    f"{tab}Task errors: {self.syncCount[taskId][5]}"
                    f"{tab}Export errors: {self.syncCount[taskId][6]}")

            if self.syncCount[taskId][5] != 0:
                lg.warning('Task done with some errors. Check logs')
            if self.syncCount[taskId][6] != 0:
                log.warning(
                    'Task had errors with sending documents. '
                    f'Documents that were not sent are saved in a folder {picker.factory.failPath}'
                )

            self.check_point(taskId, 'complete')
        except Exception as e:
            if log.level == 10:
                e = traceback.format_exc()
            log.error(f"Fail with task {taskName}: {e}")
Example #17
0
def get_znode_tree_from_qconf(cluster_name,
                              path,
                              nodes,
                              current_id='1',
                              parent_id='0'):
    """get zookeeper nodes from qconf recursively, format as ztree data
    """
    from lib.zyqconf import qconf_py

    # 节点名只取最末尾的名称
    name = path if path == "/" else path.rsplit('/', 1)[-1]
    nodes.append({
        "id": current_id,
        "pId": parent_id,
        "name": name,
        "path": path
    })

    children = []
    try:
        children = qconf_py.get_batch_keys(path, cluster_name)
    except qconf_py.Error as exc:
        # fix bug for qconf get_batch_keys from root path "/"
        if exc.message == "Error parameter!":
            zoo_client = ZookeeperService.get_zoo_client(cluster_name)
            children = zoo_client.get_children(path)
        else:
            log.warning('Node does not exists on QConf agent: %s', path)

    for idx, child in enumerate(children):
        # 左填充0到数字, 避免树的广度过宽,id冲突错误, 01, 09...
        idx = '{0:02d}'.format(idx)
        # parent_id as 1, then child_id should be 10, 11, 12...
        child_id = "{0}{1}".format(current_id, idx)
        child_path = os.path.join(path, str(child))
        get_znode_tree_from_qconf(cluster_name, child_path, nodes, child_id,
                                  current_id)
Example #18
0
def get_znode_tree(zoo_client, path, nodes, current_id='1', parent_id='0'):
    """get zookeeper nodes recursively, format as ztree data
    """
    # 节点名只取最末尾的名称
    name = path if path == "/" else path.rsplit('/', 1)[-1]
    nodes.append({
        "id": current_id,
        "pId": parent_id,
        "name": name,
        "path": path
    })

    try:
        children = zoo_client.get_children(path)
    except NoNodeError as exc:
        log.warning('Node does not exists on zookeeper: %s', path)
    else:
        for idx, child in enumerate(children):
            # 左填充0到数字, 避免树的广度过宽,id冲突错误, 01, 09...
            idx = '{0:02d}'.format(idx)
            # parent_id as 1, then child_id should be 10, 11, 12...
            child_id = "{0}{1}".format(current_id, idx)
            child_path = os.path.join(path, child)
            get_znode_tree(zoo_client, child_path, nodes, child_id, current_id)
Example #19
0
 def _exec(self):
     '''with退出处理
     '''
     try:
         self._args_set()
     except ValueError as e:
         log_format(self, error_info=e)
         msg = "参数错误,请检查参数后再请求! {}".format(e)
         return self.send_obj({'status': 1000, 'msg': msg})
     try:
         res = self.response()
     except OperationalError as e:
         # Mysql 连接错误
         log.warning('Mysql OperationalError: %s', e)
         reconnect()
         # 如果连接失败重新加载当前请求页面
         return self.redirect(self.request.uri)
     except Exception:
         import traceback
         e = traceback.format_exc()
         log.error('Internal Error: %s', e)
         return self.send_obj({'status': 10001, 'msg': str(e)})
     if not self._finished:
         return self.send_obj(res)
Example #20
0
def log_format(instance, func_name=None, params=None, error_info=None):
    """格式化log信息
    Args:
        instance: 类实例,当前业务环境下针对Handler类
        func_name: 类中调用返回为空的方法
        params: str 需要在log中说明的参数
        error_info: error级log的错误信息
    """
    if inspect.isclass(type(instance)):
        module_name = instance.__module__
        class_name = instance.__class__.__name__
        if not params:
            params = instance.request.uri
        end_time = time.time()
        spend_time = round((end_time - instance._start_time) * 1000, 2)
        if error_info:
            log.error(
                "%s.%s faild spend_time:%sms params:(%s) error info:%s",
                module_name,
                class_name,
                spend_time,
                params,
                error_info,
            )
            return
        if func_name:
            log.warning(
                "%s.%s call %s faild spend_time:%sms params:(%s)",
                module_name,
                class_name,
                func_name,
                spend_time,
                params,
            )
        else:
            log.warning("%s.%s faild spend_time:%sms params:(%s)", module_name, class_name, spend_time, params)
Example #21
0
    def _create_db(self) -> sqlite3.Connection:
        """
        Создание локальной бд. Восстановление с диска и флаг принудительной синхронизации
        :return:
        """
        try:
            cnx = sqlite3.connect(self.db)
        except Exception as e:
            log.critical(f"Can't open local datastore {self.db}: {e}")
            raise Exception(e)

        # при старте всегда проверка статуса прошлого завершения работы
        fail = False
        try:
            cur = cnx.cursor()
            cur.execute('SELECT self_status FROM lootnika')
        except Exception:
            if self.db != ':memory:':
                log.warning(f'Creating new tasks journal scheme')
            fail = True

        if fail:
            try:
                cur.executescript("""
                    CREATE TABLE lootnika (self_status VARCHAR);
                    CREATE TABLE tasks (
                        id              INTEGER PRIMARY KEY AUTOINCREMENT
                                        UNIQUE
                                        NOT NULL,
                        name                VARCHAR  NOT NULL,
                        start_time          DATETIME,
                        end_time            DATETIME,
                        status              VARCHAR,
                        count_total         INTEGER  DEFAULT (0),
                        count_seen          INTEGER  DEFAULT (0),
                        count_new           INTEGER  DEFAULT (0),
                        count_differ        INTEGER  DEFAULT (0),
                        count_delete        INTEGER  DEFAULT (0),
                        count_task_error    INTEGER  DEFAULT (0),
                        count_export_error  INTEGER  DEFAULT (0),
                        last_doc_id         VARCHAR);
                    CREATE TRIGGER delete_till_100 INSERT ON tasks WHEN (SELECT count(*) FROM tasks)>100 
                    BEGIN
                        DELETE FROM tasks WHERE tasks.id IN (
                            SELECT id FROM tasks ORDER BY id LIMIT (SELECT count(*) - 100 FROM tasks)
                        );
                    END;
                    CREATE TABLE "sphinxbuilder" (
                        "id"	INTEGER,
                        "owner"	TEXT,
                        "name"	TEXT,
                        "path"	TEXT,
                        "hash"	TEXT,
                        PRIMARY KEY("id")
                    );
                """)
                cnx.commit()
            except Exception as e:
                log.error(
                    f'Unable to create datastore scheme in lootnika_tasks_journal.db: {e}'
                )

        cur.execute('SELECT self_status FROM lootnika')
        rec = cur.fetchone()
        if rec is None:
            cur.execute(
                "INSERT INTO lootnika('self_status') VALUES ('starting lootnika')"
            )
        elif rec and rec[0] != 'shutdown successfully':
            log.warning(
                f'The previous shutdown was unexpected. Last lootnika status: {rec[0]}.'
            )
            self.isRestored = True

        cur.execute("UPDATE lootnika SET self_status='starting lootnika'")
        cnx.commit()
        cur.close()
        return cnx
Example #22
0
def check_rst(ds: Datastore) -> dict:
    #TODO need refactoring
    log.debug("Check documentation sources")
    changed = False
    try:
        rows = ds.select('SELECT * FROM sphinxbuilder', )
        oldRst = {
            'lootnika': {
                'path': "docs/rst/",
                'type': 'lootnika',
                'rst': {}
            }
        }
        for row in rows:
            if row[1] not in oldRst:
                oldRst[row[1]] = {'rst': {}}

            oldRst[row[1]]['rst'][row[3]] = {'file': row[2], 'hash': row[4]}

        newRst = {
            'lootnika': {
                'path': "docs/rst/",
                'type': 'lootnika',
                'rst': {}
            }
        }
        for exporter in os.listdir(f'{homeDir}exporters'):
            path = f"exporters/{exporter}/docs/rst/"
            ls = os.listdir(f"{homeDir}{path}")
            if ls == []:
                log.warning(f"No documentation sources found for {exporter}")
                continue

            if exporter not in oldRst:
                log.info(f"Found new exporter docs: {exporter}")
                oldRst[exporter] = {
                    'path': path,
                    'type': 'exporter',
                    'rst': {}
                }

            newRst[exporter] = {'path': path, 'type': 'exporter', 'rst': {}}
            for file in ls:
                rst = f"{path}{file}"
                with open(f"{homeDir}{rst}", encoding='utf-8',
                          mode='r') as cnt:
                    hsh = f"{cityhash.CityHash64(cnt.read())}"

                newRst[exporter]['rst'][rst] = {'file': file, 'hash': hsh}
                if rst in oldRst[exporter]['rst']:
                    if not oldRst[exporter]['rst'][rst]['hash'] == hsh:
                        changed = True
                else:
                    changed = True

        for picker in os.listdir(f'{homeDir}pickers'):
            path = f"pickers/{picker}/docs/rst/"
            ls = os.listdir(f"{homeDir}{path}")
            if ls == []:
                log.warning(f"No documentation sources found for {picker}")
                continue

            if picker not in oldRst:
                log.info(f"Found new picker docs: {picker}")
                oldRst[picker] = {'path': path, 'type': 'exporter', 'rst': {}}

            newRst[picker] = {'path': path, 'type': 'picker', 'rst': {}}
            for file in ls:
                rst = f"{path}{file}"
                with open(f"{homeDir}{rst}", encoding='utf-8',
                          mode='r') as cnt:
                    hsh = f"{cityhash.CityHash64(cnt.read())}"

                newRst[picker]['rst'][rst] = {'file': file, 'hash': hsh}
                if rst in oldRst[picker]['rst']:
                    if not oldRst[picker]['rst'][rst]['hash'] == hsh:
                        changed = True
                else:
                    changed = True

        exporter = "lootnika"
        path = newRst[exporter]['path']
        ls = os.listdir(f"{homeDir}{path}")
        for file in ls:
            rst = f"{path}{file}"
            with open(f"{homeDir}{rst}", encoding='utf-8', mode='r') as cnt:
                hsh = f"{cityhash.CityHash64(cnt.read())}"

            newRst[exporter]['rst'][rst] = {'file': file, 'hash': hsh}
            if rst in oldRst[exporter]['rst']:
                if not oldRst[exporter]['rst'][rst]['hash'] == hsh:
                    changed = True
            else:
                changed = True

        if changed:
            log.warning(
                "Found changes in documentations. Start me with <make-doc> key."
            )

        return newRst
    except Exception as e:
        raise Exception(
            f"Fail check sources for help documentation: {traceback.format_exc()}"
        )
Example #23
0
def process_inspector():
    def get_pid(exe: str, exePath: str, workDir: str = None) -> int:
        # if give workDir, will check only it

        for p in psutil.process_iter(["name", 'exe', 'cwd']):
            # if 'calc1' in p.info['name']:
            # sout(f"{p.pid} | {p.info['name']} | {p.info['cwd']} | {p.info['exe']}", 'violet' )

            if exe == p.info['name'].lower():
                if workDir:
                    if not p.info['cwd'].endswith('/'):
                        p.info['cwd'] = f"{p.info['cwd']}/"

                    if workDir.lower() == p.info['cwd'].replace('\\', '/',
                                                                -1).lower():
                        return p.pid
                else:
                    if PLATFORM == 'nt':
                        exePath = f"{exePath}{exe}"
                    else:
                        exePath = exePath[:-1]

                    if exePath.lower() == p.info['exe'].replace('\\', '/',
                                                                -1).lower():
                        return p.pid

    def restart(job: dict,
                exePid: int = None,
                killRecursive: bool = False) -> str:
        data = ""
        status = 0
        failList[taskName]['attemp'] += 1
        if exePid:
            try:
                assert exePid != os.getpid(), "won't kill myself"
                parent = psutil.Process(exePid)
                children = parent.children(killRecursive)
                children.append(parent)

                # TODO try soft kill before hard
                for p in children:
                    try:
                        # p.send_signal(signal.SIGTERM)
                        p.kill()
                    except psutil.NoSuchProcess:
                        pass

                _, alive = psutil.wait_procs(children, timeout=60)
                if alive:
                    raise Exception(
                        f"Fail to kill process {exe} (PID {exePid})")
            except Exception as e:
                data = f'Fail to restart process {exe}: {e}\n'
                log.error(data)
                status = 2

        if status == 0:
            log.debug(f"Launch application {taskName}")
            whatStart = job['whatStart']

            if whatStart == 'command':
                target = job['command']
            elif whatStart == 'exe':
                target = f"{job['exePath']}{exe} {job['exeKey']}"
            else:
                target = None

            if target:
                log.info(f"Starting {taskName}")
                try:
                    if PLATFORM == 'nt':
                        os.system(f"start cmd /c {target}")
                    else:
                        os.system(f"command {target} &")
                except Exception as e:
                    data = f"Fail to restart application: {exe} ({taskName}): {e}\n"
                    status = 3
            else:
                log.info(f"Starting service {job['service']}")
                try:
                    if PLATFORM == 'nt':
                        win32serviceutil.StartService(job['service'])
                    else:
                        os.system(f"systemctl start {job['service']}")

                except Exception as e:
                    e = traceback.format_exc()
                    log.error(str(e))
                    status = 3
                    data = f"Fail to start service: {job['service']} ({taskName}): {e}\n"

            # проверка что он снова не упал
            # TODO отсчёт времени падения после старта
            if status == 0:
                sleep(restartTime)
                if get_pid(exe, checkPath, workDir):
                    data += 'Successfully restarted application'
                    failList[taskName]['isAlive'] = False
                    failList[taskName]['attemp'] -= 1
                    log.info(data)
                else:
                    data += f'Fail to start {taskName}'
                    log.error(data)
            else:
                log.error(data)

        new_toast(taskName, data)
        return data

    sleep(3)
    selfName = "process_inspector"
    failList = {}
    for job in jobList:
        failList[job] = {'isAlive': False, "attemp": 0}

    while True:
        try:
            for job in jobList.values():
                taskName = job['task']
                exe = job['exe'].lower()
                checkPath = job['checkPath']
                exePath = job['exePath']
                workDir = job['workDir']
                doRestart = job['doRestart']
                alwaysWork = job['alwaysWork']
                restartTime = job['restartTime']
                respTime = job['respTime']
                status = 0
                body = ''

                log.info(f'Check app {taskName}')
                exePid = get_pid(exe, checkPath, workDir)

                if exePid and not job['checkUrl']:
                    log.debug(f"{taskName} is fine.")
                elif exePid and job['checkUrl']:
                    log.debug(f"Found {taskName}. Check http status")
                    try:
                        res = requests.get(job['url'], timeout=respTime)
                        if res.status_code != 200:
                            raise Exception(
                                f"Server return status {res.status_code}")

                        log.debug(f"{taskName} is fine.")

                        if not failList[taskName]['isAlive']:
                            continue
                        else:
                            failList[taskName]['isAlive'] = False
                            data = templater.tmpl_fill(selfName, 'alive')
                    except Exception:
                        status = 1
                        data = f"{taskName} didn't respond or return wrong answer. Trying to restart application\n"
                        new_toast(f'Restarting {taskName}', data)
                        log.warning(data)

                        body = templater.tmpl_fill(selfName,
                                                   "badAnswer").replace(
                                                       "{{taskName}}",
                                                       taskName, -1)
                        failList[taskName]['isAlive'] = True

                    if status != 0 and doRestart:
                        data += restart(job, exePid)
                        body += data

                    if 'eventScript' in job:
                        allowSend, body = execute_event_script(
                            log, job['eventScript'], taskName, 'badAnswer',
                            body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, 'badAnswer', body)

                elif not exePid and alwaysWork:
                    body = templater.tmpl_fill(selfName, 'notFound').replace(
                        "{{taskName}}", taskName, -1)
                    data = f"Not found required application {taskName}. Trying to restart\n"
                    log.warning(data)
                    new_toast(f'Starting {taskName}', data)

                    data += restart(job, exePid)
                    body += data

                    new_toast('log_inspector', 'notFound')
                    if 'eventScript' in job:
                        allowSend, body = execute_event_script(
                            log, job['eventScript'], taskName, 'notFound',
                            body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, 'notFound', body)

            sleep(intervalCheckMin)
        except Exception:
            e = traceback.format_exc()
            log.critical(str(e))
            break
Example #24
0
def disk_inspector():
    def fill_tmpl(event: str) -> str:
        body = templater.tmpl_fill(selfName, event)
        body = body.replace('{{critFree}}', str(critFree), -1)
        body = body.replace('{{diskFree}}', str(diskFree), -1)
        body = body.replace('{{diskUsage}}', diskUsage, -1)
        body = body.replace('{{taskName}}', taskName, -1)
        return body.replace('{{diskWarn}}', str(diskWarn), -1)

    log.debug("disk_inspector started")
    selfName = 'disk_inspector'

    while True:
        for taskName, task in cfg['tasks']['diskTask'].items():
            critFree = task['critFree']
            diskUsage = task['diskUsage']
            diskWarn = task['diskWarn']

            try:
                diskFree = round(
                    shutil.disk_usage(diskUsage).free / 1073741824, 2)
                if diskFree < critFree:
                    log.error(
                        f"Free disk space is critically small on {diskUsage}: {diskFree}"
                    )
                    event = 'critFree'
                    body = fill_tmpl(event)

                    new_toast(
                        diskUsage,
                        f"Free disk space is critically small: {diskFree}")
                    if 'eventScript' in task:
                        allowSend, body = execute_event_script(
                            log, task['eventScript'], taskName, event, body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, event, body)

                elif diskFree < diskWarn:
                    log.warning(
                        f"Free disk space is ends {diskUsage}: {diskFree}GB")
                    event = 'diskWarn'
                    body = fill_tmpl(event)

                    new_toast(diskUsage,
                              f"Free disk space is ends: {diskFree}GB")
                    if 'eventScript' in task:
                        allowSend, body = execute_event_script(
                            log, task['eventScript'], taskName, event, body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, event, body)
                elif diskFree > diskWarn:
                    log.info(f"disk {diskUsage}: {diskFree}GB free")

            except FileNotFoundError:
                log.error(f'disk_inspector: wrong path: {diskUsage}')
            except Exception as e:
                log.critical(f'disk_inspector: {traceback.format_exc()}')
                shutdown_me(9, 9)
        sleep(intervalCheckMin)