Exemple #1
0
    def crash(self, s: str) -> bool:
        # TODO write in datastore error message
        log.critical(s)
        for i in self.myThreads:
            if not self.myThreads[i]:
                log.critical(f"Core thread {i} is dead")

        log.debug('Calling shutdown')
        Thread(name="shutdown", target=shutdown_me, args=(1, '')).start()
        return True
Exemple #2
0
def log_inspector():
    log.debug("log_inspector started")
    selfName = 'log_inspector'
    while True:
        try:
            for taskName, task in cfg['tasks']['logTask'].items():
                log.info(f"Check log {taskName}")
                logFile = task['file']
                templates = task['tmpl']

                try:
                    # TODO open if file is changed
                    with open(logFile, encoding='utf-8') as f:
                        cnt = f.read()

                    for tmplName in templates:
                        tmpl = templater.get_tmpl(selfName, tmplName)
                        if tmpl in cnt:
                            ev = f"Found log expression {taskName}: {tmplName}"
                            log.warning(ev)
                            body = templater.tmpl_fill(selfName,
                                                       'error').replace(
                                                           '{{taskName}}',
                                                           taskName, -1)
                            event = 'error'

                            new_toast('log_inspector', event)
                            if 'eventScript' in task:
                                allowSend, body = execute_event_script(
                                    log, task['eventScript'], taskName, event,
                                    body)
                            else:
                                allowSend = True

                            if allowSend:
                                send_notify(taskName, event, body)

                except FileNotFoundError:
                    log.error(f"Not found log file {taskName}")
                except Exception as e:
                    log.error(f"Fail to parse log file {taskName}: {e}")

            sleep(intervalCheckMin * 2)
        except Exception:
            e = traceback.format_exc()
            log.critical(str(e))
            break
Exemple #3
0
def process_inspector():
    def get_pid(exe: str, exePath: str, workDir: str = None) -> int:
        # if give workDir, will check only it

        for p in psutil.process_iter(["name", 'exe', 'cwd']):
            # if 'calc1' in p.info['name']:
            # sout(f"{p.pid} | {p.info['name']} | {p.info['cwd']} | {p.info['exe']}", 'violet' )

            if exe == p.info['name'].lower():
                if workDir:
                    if not p.info['cwd'].endswith('/'):
                        p.info['cwd'] = f"{p.info['cwd']}/"

                    if workDir.lower() == p.info['cwd'].replace('\\', '/',
                                                                -1).lower():
                        return p.pid
                else:
                    if PLATFORM == 'nt':
                        exePath = f"{exePath}{exe}"
                    else:
                        exePath = exePath[:-1]

                    if exePath.lower() == p.info['exe'].replace('\\', '/',
                                                                -1).lower():
                        return p.pid

    def restart(job: dict,
                exePid: int = None,
                killRecursive: bool = False) -> str:
        data = ""
        status = 0
        failList[taskName]['attemp'] += 1
        if exePid:
            try:
                assert exePid != os.getpid(), "won't kill myself"
                parent = psutil.Process(exePid)
                children = parent.children(killRecursive)
                children.append(parent)

                # TODO try soft kill before hard
                for p in children:
                    try:
                        # p.send_signal(signal.SIGTERM)
                        p.kill()
                    except psutil.NoSuchProcess:
                        pass

                _, alive = psutil.wait_procs(children, timeout=60)
                if alive:
                    raise Exception(
                        f"Fail to kill process {exe} (PID {exePid})")
            except Exception as e:
                data = f'Fail to restart process {exe}: {e}\n'
                log.error(data)
                status = 2

        if status == 0:
            log.debug(f"Launch application {taskName}")
            whatStart = job['whatStart']

            if whatStart == 'command':
                target = job['command']
            elif whatStart == 'exe':
                target = f"{job['exePath']}{exe} {job['exeKey']}"
            else:
                target = None

            if target:
                log.info(f"Starting {taskName}")
                try:
                    if PLATFORM == 'nt':
                        os.system(f"start cmd /c {target}")
                    else:
                        os.system(f"command {target} &")
                except Exception as e:
                    data = f"Fail to restart application: {exe} ({taskName}): {e}\n"
                    status = 3
            else:
                log.info(f"Starting service {job['service']}")
                try:
                    if PLATFORM == 'nt':
                        win32serviceutil.StartService(job['service'])
                    else:
                        os.system(f"systemctl start {job['service']}")

                except Exception as e:
                    e = traceback.format_exc()
                    log.error(str(e))
                    status = 3
                    data = f"Fail to start service: {job['service']} ({taskName}): {e}\n"

            # проверка что он снова не упал
            # TODO отсчёт времени падения после старта
            if status == 0:
                sleep(restartTime)
                if get_pid(exe, checkPath, workDir):
                    data += 'Successfully restarted application'
                    failList[taskName]['isAlive'] = False
                    failList[taskName]['attemp'] -= 1
                    log.info(data)
                else:
                    data += f'Fail to start {taskName}'
                    log.error(data)
            else:
                log.error(data)

        new_toast(taskName, data)
        return data

    sleep(3)
    selfName = "process_inspector"
    failList = {}
    for job in jobList:
        failList[job] = {'isAlive': False, "attemp": 0}

    while True:
        try:
            for job in jobList.values():
                taskName = job['task']
                exe = job['exe'].lower()
                checkPath = job['checkPath']
                exePath = job['exePath']
                workDir = job['workDir']
                doRestart = job['doRestart']
                alwaysWork = job['alwaysWork']
                restartTime = job['restartTime']
                respTime = job['respTime']
                status = 0
                body = ''

                log.info(f'Check app {taskName}')
                exePid = get_pid(exe, checkPath, workDir)

                if exePid and not job['checkUrl']:
                    log.debug(f"{taskName} is fine.")
                elif exePid and job['checkUrl']:
                    log.debug(f"Found {taskName}. Check http status")
                    try:
                        res = requests.get(job['url'], timeout=respTime)
                        if res.status_code != 200:
                            raise Exception(
                                f"Server return status {res.status_code}")

                        log.debug(f"{taskName} is fine.")

                        if not failList[taskName]['isAlive']:
                            continue
                        else:
                            failList[taskName]['isAlive'] = False
                            data = templater.tmpl_fill(selfName, 'alive')
                    except Exception:
                        status = 1
                        data = f"{taskName} didn't respond or return wrong answer. Trying to restart application\n"
                        new_toast(f'Restarting {taskName}', data)
                        log.warning(data)

                        body = templater.tmpl_fill(selfName,
                                                   "badAnswer").replace(
                                                       "{{taskName}}",
                                                       taskName, -1)
                        failList[taskName]['isAlive'] = True

                    if status != 0 and doRestart:
                        data += restart(job, exePid)
                        body += data

                    if 'eventScript' in job:
                        allowSend, body = execute_event_script(
                            log, job['eventScript'], taskName, 'badAnswer',
                            body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, 'badAnswer', body)

                elif not exePid and alwaysWork:
                    body = templater.tmpl_fill(selfName, 'notFound').replace(
                        "{{taskName}}", taskName, -1)
                    data = f"Not found required application {taskName}. Trying to restart\n"
                    log.warning(data)
                    new_toast(f'Starting {taskName}', data)

                    data += restart(job, exePid)
                    body += data

                    new_toast('log_inspector', 'notFound')
                    if 'eventScript' in job:
                        allowSend, body = execute_event_script(
                            log, job['eventScript'], taskName, 'notFound',
                            body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, 'notFound', body)

            sleep(intervalCheckMin)
        except Exception:
            e = traceback.format_exc()
            log.critical(str(e))
            break
Exemple #4
0
def disk_inspector():
    def fill_tmpl(event: str) -> str:
        body = templater.tmpl_fill(selfName, event)
        body = body.replace('{{critFree}}', str(critFree), -1)
        body = body.replace('{{diskFree}}', str(diskFree), -1)
        body = body.replace('{{diskUsage}}', diskUsage, -1)
        body = body.replace('{{taskName}}', taskName, -1)
        return body.replace('{{diskWarn}}', str(diskWarn), -1)

    log.debug("disk_inspector started")
    selfName = 'disk_inspector'

    while True:
        for taskName, task in cfg['tasks']['diskTask'].items():
            critFree = task['critFree']
            diskUsage = task['diskUsage']
            diskWarn = task['diskWarn']

            try:
                diskFree = round(
                    shutil.disk_usage(diskUsage).free / 1073741824, 2)
                if diskFree < critFree:
                    log.error(
                        f"Free disk space is critically small on {diskUsage}: {diskFree}"
                    )
                    event = 'critFree'
                    body = fill_tmpl(event)

                    new_toast(
                        diskUsage,
                        f"Free disk space is critically small: {diskFree}")
                    if 'eventScript' in task:
                        allowSend, body = execute_event_script(
                            log, task['eventScript'], taskName, event, body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, event, body)

                elif diskFree < diskWarn:
                    log.warning(
                        f"Free disk space is ends {diskUsage}: {diskFree}GB")
                    event = 'diskWarn'
                    body = fill_tmpl(event)

                    new_toast(diskUsage,
                              f"Free disk space is ends: {diskFree}GB")
                    if 'eventScript' in task:
                        allowSend, body = execute_event_script(
                            log, task['eventScript'], taskName, event, body)
                    else:
                        allowSend = True

                    if allowSend:
                        send_notify(taskName, event, body)
                elif diskFree > diskWarn:
                    log.info(f"disk {diskUsage}: {diskFree}GB free")

            except FileNotFoundError:
                log.error(f'disk_inspector: wrong path: {diskUsage}')
            except Exception as e:
                log.critical(f'disk_inspector: {traceback.format_exc()}')
                shutdown_me(9, 9)
        sleep(intervalCheckMin)
Exemple #5
0
    def _create_db(self) -> sqlite3.Connection:
        """
        Создание локальной бд. Восстановление с диска и флаг принудительной синхронизации
        :return:
        """
        try:
            cnx = sqlite3.connect(self.db)
        except Exception as e:
            log.critical(f"Can't open local datastore {self.db}: {e}")
            raise Exception(e)

        # при старте всегда проверка статуса прошлого завершения работы
        fail = False
        try:
            cur = cnx.cursor()
            cur.execute('SELECT self_status FROM lootnika')
        except Exception:
            if self.db != ':memory:':
                log.warning(f'Creating new tasks journal scheme')
            fail = True

        if fail:
            try:
                cur.executescript("""
                    CREATE TABLE lootnika (self_status VARCHAR);
                    CREATE TABLE tasks (
                        id              INTEGER PRIMARY KEY AUTOINCREMENT
                                        UNIQUE
                                        NOT NULL,
                        name                VARCHAR  NOT NULL,
                        start_time          DATETIME,
                        end_time            DATETIME,
                        status              VARCHAR,
                        count_total         INTEGER  DEFAULT (0),
                        count_seen          INTEGER  DEFAULT (0),
                        count_new           INTEGER  DEFAULT (0),
                        count_differ        INTEGER  DEFAULT (0),
                        count_delete        INTEGER  DEFAULT (0),
                        count_task_error    INTEGER  DEFAULT (0),
                        count_export_error  INTEGER  DEFAULT (0),
                        last_doc_id         VARCHAR);
                    CREATE TRIGGER delete_till_100 INSERT ON tasks WHEN (SELECT count(*) FROM tasks)>100 
                    BEGIN
                        DELETE FROM tasks WHERE tasks.id IN (
                            SELECT id FROM tasks ORDER BY id LIMIT (SELECT count(*) - 100 FROM tasks)
                        );
                    END;
                    CREATE TABLE "sphinxbuilder" (
                        "id"	INTEGER,
                        "owner"	TEXT,
                        "name"	TEXT,
                        "path"	TEXT,
                        "hash"	TEXT,
                        PRIMARY KEY("id")
                    );
                """)
                cnx.commit()
            except Exception as e:
                log.error(
                    f'Unable to create datastore scheme in lootnika_tasks_journal.db: {e}'
                )

        cur.execute('SELECT self_status FROM lootnika')
        rec = cur.fetchone()
        if rec is None:
            cur.execute(
                "INSERT INTO lootnika('self_status') VALUES ('starting lootnika')"
            )
        elif rec and rec[0] != 'shutdown successfully':
            log.warning(
                f'The previous shutdown was unexpected. Last lootnika status: {rec[0]}.'
            )
            self.isRestored = True

        cur.execute("UPDATE lootnika SET self_status='starting lootnika'")
        cnx.commit()
        cur.close()
        return cnx