Esempio n. 1
0
def unregi_url(url):
    try:
        sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL + ' WHERE url = \'' + url + '\''

        with DB.connection() as conn:
            url_info = conn.cursor().execute(sql).fetchall()

        conn.close()

        # if no exist
        if len(url_info) == 0:
            res_body = {'Result': 'SUCCESS'}
        else:
            # delete db
            sql = 'DELETE FROM ' + DB.REGI_SYS_TBL + ' WHERE url = \'' + url + '\''

            ret = DB.sql_execute(sql)

            if ret == 'SUCCESS':
                res_body = {'Result': 'SUCCESS'}
            else:
                res_body = {'Result': 'FAIL'}

        return res_body
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
Esempio n. 2
0
def proc_dis_system(node, dummy):
    try:
        result = dict()

        for sys_type in CONF.watchdog()['check_system']:
            event_list = DB.get_event_list(sys_type)

            sql = 'SELECT ' + DB.STATUS_TBL + '.nodename, ' + DB.NODE_INFO_TBL + '.ip_addr, ' + ", ".join(event_list) + ' FROM ' + DB.STATUS_TBL + \
                  ' INNER JOIN ' + DB.NODE_INFO_TBL + ' ON ' + DB.STATUS_TBL + '.nodename = ' + DB.NODE_INFO_TBL + '.nodename WHERE type = \'' + sys_type + '\''

            if not node == 'all':
                sql = sql + ' and ' + DB.NODE_INFO_TBL + '.nodename = \'' + node + '\''

            with DB.connection() as conn:
                nodes_info = conn.cursor().execute(sql).fetchall()
            conn.close()

            for row in nodes_info:
                line = dict()
                line['TYPE'] = sys_type
                line['IP'] = row[1]
                i = 2
                for item in event_list:
                    line[item] = row[i]
                    i = i + 1

                result[row[0]] = line

        return result
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
Esempio n. 3
0
def regi_url(url, auth):
    try:
        sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL + ' WHERE url = \'' + url + '\''

        with DB.connection() as conn:
            url_info = conn.cursor().execute(sql).fetchall()
        conn.close()

        # if already exist
        if len(url_info) == 1:
            res_body = {'Result': 'SUCCESS'}
        else:
            # insert db
            sql = 'INSERT INTO ' + DB.REGI_SYS_TBL + ' VALUES (\'' + url + '\', \'' + auth + '\' )'

            ret = DB.sql_execute(sql)

            if ret == 'SUCCESS':
                res_body = {'Result': 'SUCCESS'}
            else:
                res_body = {'Result': 'FAIL'}

        return res_body
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
Esempio n. 4
0
def get_event_list(url, auth):
    try:
        sql_evt = 'SELECT * FROM ' + DB.EVENT_TBL

        with DB.connection() as conn:
            evt_list = conn.cursor().execute(sql_evt).fetchall()
        conn.close()

        event_list = []

        for nodename, item, grade, pre_grade, reason, time in evt_list:
            evt = {
                'event': 'occur',
                'system': nodename,
                'item': item,
                'grade': grade,
                'pre_grade': pre_grade,
                'reason': 'fail_reason',
                'time': time
            }
            event_list.append(evt)

            res_body = {'Result': 'SUCCESS', 'Event list': event_list}

        return res_body
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
Esempio n. 5
0
def push_event(node_name, item, grade, desc, time):
    sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL

    with DB.connection() as conn:
        url_list = conn.cursor().execute(sql).fetchall()

    conn.close()

    for url, auth in url_list:
        header = {'Content-Type': 'application/json', 'Authorization': auth}
        req_body = {
            'event': 'occur',
            'system': node_name,
            'item': item,
            'grade': grade,
            'desc': desc,
            'time': time
        }
        req_body_json = json.dumps(req_body)

        try:
            requests.post(url, headers=header, data=req_body_json, timeout=2)
        except:
            # rest timeout
            LOG.exception()
Esempio n. 6
0
def xos_status_check(conn, db_log, node_name):
    xos_status = 'ok'
    xos_list = []
    fail_reason = []

    try:
        url = CONF.xos()['xos_rest_server']
        account = CONF.xos()['xos_rest_account']

        cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/xoses/'
        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("Cmd Fail, cause => %s", error)
            return 'fail', None

        xos_array = json.loads(output)

        for xos_info in xos_array:
            backend_status = xos_info['backend_status']

            LOG.info('xos_status_backend_status = ' + backend_status)

            tmp = str(backend_status).split('-')

            if tmp[0].strip() == '0':
                status = 'ok'
            else:
                status = 'nok'

            xos_json = {
                'name': xos_info['name'],
                'status': status,
                'description': tmp[1].strip()
            }
            xos_list.append(xos_json)

            if status == 'nok':
                xos_status = 'nok'
                fail_reason.append(xos_json)

            try:
                sql = 'UPDATE ' + DB.XOS_TBL + \
                      ' SET xos_status = \"' + str(xos_list) + '\"' + \
                      ' WHERE nodename = \'' + node_name + '\''
                db_log.write_log('----- UPDATE XOS STATUS INFO -----\n' + sql)

                if DB.sql_execute(sql, conn) != 'SUCCESS':
                    db_log.write_log('[FAIL] XOS STATUS DB Update Fail.')
            except:
                LOG.exception()

    except:
        LOG.exception()
        xos_status = 'fail'

    return xos_status, fail_reason
Esempio n. 7
0
def regi_url(url, auth):
    try:
        sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL + ' WHERE url = \'' + url + '\''
        sql_evt = 'SELECT * FROM ' + DB.EVENT_TBL

        with DB.connection() as conn:
            url_info = conn.cursor().execute(sql).fetchall()
            evt_list = conn.cursor().execute(sql_evt).fetchall()
        conn.close()

        event_list = []

        for nodename, item, grade, desc, time in evt_list:
            if not grade in ['ok', 'normal']:
                evt = {
                    'event': 'occur',
                    'system': nodename,
                    'item': item,
                    'grade': grade,
                    'desc': desc,
                    'time': time
                }
                event_list.append(evt)

        # if already exist
        if len(url_info) == 1:
            res_body = {'Result': 'SUCCESS', 'Event list': event_list}
        else:
            # insert db
            sql = 'INSERT INTO ' + DB.REGI_SYS_TBL + ' VALUES (\'' + url + '\', \'' + auth + '\' )'

            ret = DB.sql_execute(sql)

            if ret == 'SUCCESS':
                res_body = {'Result': 'SUCCESS', 'Event list': event_list}
            else:
                res_body = {'Result': 'FAIL'}

        return res_body
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
Esempio n. 8
0
def onos_ha_check(conn, db_log):
    try:
        stats_url = CONF.ha()['ha_proxy_server']
        account = CONF.ha()['ha_proxy_account']

        cmd = 'curl --user ' + account + ' --header \'Accept: text/html, application/xhtml+xml, image/jxr, */*\' \"' + stats_url + '\"'
        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("Cmd Fail, cause => %s", error)
            return None
        else:
            report_data = csv.DictReader(output.lstrip('# ').splitlines())

        dic_stat = dict()
        for row in report_data:
            if row['pxname'].strip() == 'stats' or row['svname'].strip(
            ) == 'BACKEND':
                continue

            dtl_list = {
                'name': row['svname'],
                'req_count': row['stot'],
                'succ_count': row['hrsp_2xx'],
                'node_sts': row['status']
            }

            svc_type = row['pxname']

            if (dic_stat.has_key(svc_type)):
                dic_stat[svc_type].append(dtl_list)
            else:
                dic_stat[svc_type] = list()
                dic_stat[svc_type].append(dtl_list)

        try:
            str_dic_stat = str(dic_stat)

            sql = 'UPDATE ' + DB.HA_TBL + \
                  ' SET stats = \"' + str_dic_stat + '\"' + \
                  ' WHERE ha_key = \"' + 'HA' + '\"'
            db_log.write_log('----- UPDATE HA INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] HA DB Update Fail.')
        except:
            LOG.exception()

        return dic_stat
    except:
        LOG.exception()
        return None
Esempio n. 9
0
def push_event(node_name, item, grade, pre_grade, reason, time, flush_alarm):
    global history_log

    try:
        history_log.write_log('[%s][%s][%s->%s] %s', node_name, item,
                              pre_grade, grade, reason)

        sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL

        with DB.connection() as conn:
            url_list = conn.cursor().execute(sql).fetchall()

        conn.close()

        for url, auth in url_list:
            header = {
                'Content-Type': 'application/json',
                'Authorization': str(auth)
            }
            req_body = {
                'system': node_name,
                'item': item,
                'grade': grade,
                'pre_grade': pre_grade,
                'reason': reason,
                'time': time
            }
            req_body_json = json.dumps(req_body)

            try:
                requests.post(str(url),
                              headers=header,
                              data=req_body_json,
                              timeout=2)
            except:
                # Push event does not respond
                pass

        reason_str = ''
        if type(reason) == list:
            if len(reason) > 0:
                reason_str = '-- ' + '\n-- '.join(reason)
        else:
            reason_str = str(reason)

        ALARM.queue_alarm(node_name + ' ' + item + ' ' + grade.upper(),
                          reason_str, time)
        if flush_alarm:
            ALARM.flush_pending_alarm()

    except:
        LOG.exception()
Esempio n. 10
0
def occur_event(conn, node_name, item, pre_value, cur_value):
    time = str(datetime.now())
    desc = pre_value + ' -> ' + cur_value
    sql = 'UPDATE ' + DB.EVENT_TBL + \
          ' SET grade = \'' + cur_value + '\'' + ',' + \
          ' desc = \'' + desc + '\'' + ',' + \
          ' time = \'' + time + '\'' + \
          ' WHERE nodename = \'' + node_name + '\' and item = \'' + item + '\''
    LOG.info('Update alarm info = ' + sql)

    if DB.sql_execute(sql, conn) != 'SUCCESS':
        LOG.error('DB Update Fail.')

    push_event(node_name, item, cur_value, desc, time)
Esempio n. 11
0
def get_node_list(nodes, param, tbl=DB.NODE_INFO_TBL):
    try:
        if nodes == 'all':
            sql = 'SELECT ' + param + ' FROM ' + tbl
        else:
            sql = 'SELECT ' + param + ' FROM ' + tbl + ' WHERE nodename = \'' + nodes + '\''

        with DB.connection() as conn:
            nodes_info = conn.cursor().execute(sql).fetchall()

        conn.close()
        return nodes_info
    except:
        LOG.exception()
        return None
Esempio n. 12
0
def proc_dis_ha(dummy, param):
    try:
        sql = 'SELECT stats FROM ' + DB.HA_TBL + ' WHERE ha_key = \'HA\''

        with DB.connection() as conn:
            nodes_info = conn.cursor().execute(sql).fetchone()
        conn.close()

        for value in nodes_info:
            return json.loads(str(value).replace('\'', '\"'))

        return {'HA': 'FAIL'}
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
Esempio n. 13
0
    def run(self):

        # DB initiation
        DB.db_initiation()

        # Start RESTful server
        try:
            REST_SVR.rest_server_start()
        except:
            print 'Rest Server failed to start'
            LOG.exception()
            sys.exit(1)

        # Periodic monitoring
        if CONF.watchdog()['interval'] == 0:
            LOG.info("--- Not running periodic monitoring ---")
            while True:
                time.sleep(3600)
        else:
            LOG.info("--- Periodic Monitoring Start ---")

            conn = DB.connection()

            while True:
                try:
                    watchdog.periodic(conn)

                    time.sleep(CONF.watchdog()['interval'])
                except:
                    watchdog.push_event('sonawatcher', 'disconnect',
                                        'critical',
                                        'sonawatcher server shutdown',
                                        str(datetime.now()))
                    conn.close()
                    LOG.exception()
                    sys.exit(1)
Esempio n. 14
0
def occur_event(conn, db_log, node_name, item, pre_grade, cur_grade, reason):
    try:
        time = str(datetime.now())
        sql = 'UPDATE ' + DB.EVENT_TBL + \
              ' SET grade = \'' + cur_grade + '\'' + ',' + \
              ' pre_grade = \'' + pre_grade + '\'' + ',' + \
              ' reason = \"' + str(reason) + '\"' + ',' + \
              ' time = \'' + time + '\'' + \
              ' WHERE nodename = \'' + node_name + '\' and item = \'' + item + '\''
        db_log.write_log('----- UPDATE EVENT INFO -----\n' + sql)

        if DB.sql_execute(sql, conn) != 'SUCCESS':
            db_log.write_log('[FAIL] EVENT INFO DB Update Fail.')

        push_event(node_name, item, cur_grade, pre_grade, reason, time, False)
    except:
        LOG.exception()
Esempio n. 15
0
def push_event(node_name, item, grade, pre_grade, reason, time):
    global history_log

    try:
        history_log.write_log('[%s][%s][%s][%s] %s', node_name, item, grade,
                              pre_grade, reason)

        sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL

        with DB.connection() as conn:
            url_list = conn.cursor().execute(sql).fetchall()

        conn.close()

        for url, auth in url_list:
            header = {
                'Content-Type': 'application/json',
                'Authorization': str(auth)
            }
            req_body = {
                'system': node_name,
                'item': item,
                'grade': grade,
                'pre_grade': pre_grade,
                'reason': reason,
                'time': time
            }
            req_body_json = json.dumps(req_body)

            try:
                requests.post(str(url),
                              headers=header,
                              data=req_body_json,
                              timeout=2)
            except:
                # Push event does not respond
                pass
    except:
        LOG.exception()
Esempio n. 16
0
def check_resource(conn, db_log, node_name, user_name, node_ip):
    try:
        cpu = str(get_cpu_usage(user_name, node_ip, True))
        mem = str(get_mem_usage(user_name, node_ip, True))
        disk = str(get_disk_usage(user_name, node_ip, True))

        try:
            sql = 'UPDATE ' + DB.RESOURCE_TBL + \
                  ' SET cpu = \'' + cpu + '\',' + \
                  ' memory = \'' + mem + '\',' + \
                  ' disk = \'' + disk + '\'' \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE RESOURCE INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] RESOURCE DB Update Fail.')
        except:
            LOG.exception()

        return cpu, mem, disk
    except:
        LOG.exception()
        return -1, -1, -1
Esempio n. 17
0
def swarm_check(conn, db_log, node_name, user_name, node_ip):
    str_node = ''
    str_service = ''
    str_ps = ''

    ret_app = 'ok'
    ret_node = 'ok'

    node_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker node ls')

    if node_rt is not None:
        try:
            leader_flag = False
            for line in node_rt.splitlines():
                line = line.decode('utf-8')
                str_node = str_node + line + '\n'

                if line.startswith('ID'):
                    continue

                if 'Leader' in line:
                    leader_flag = True

                    if not ('Ready' in line and 'Active' in line):
                        ret_node = 'nok'
                        break

                if 'Down' in line:
                    ret_node = 'nok'
                    break

            if not leader_flag:
                ret_node = 'nok'
        except:
            LOG.exception()
            ret_node = 'nok'

    else:
        LOG.error("\'%s\' Swarm Node Check Error", node_ip)
        str_node = 'fail'

    service_rt = SshCommand.ssh_exec(user_name, node_ip,
                                     'sudo docker service ls')

    if service_rt is not None:
        try:
            for app in CONF.swarm()['app_list']:
                find_flag = False
                for line in service_rt.splitlines():
                    line = line.decode('utf-8')

                    if line.startswith('ID'):
                        continue

                    id, name, mode, rep, img = line.split()

                    if app == name:
                        find_flag = True
                        rep_tmp = rep.split('/')

                        if not (rep_tmp[0] == rep_tmp[1]):
                            ret_app = 'nok'
                            break

                if not find_flag:
                    ret_app = 'nok'
                    break
        except:
            LOG.exception()
            ret_app = 'nok'

        for line in service_rt.splitlines():
            line = line.decode('utf-8')
            str_service = str_service + line + '\n'
    else:
        LOG.error("\'%s\' Swarm Service Check Error", node_ip)
        str_service = 'fail'
        ret_app = 'nok'

    try:
        for app in CONF.swarm()['app_list']:
            ps_rt = SshCommand.ssh_exec(user_name, node_ip,
                                        'sudo docker service ps ' + app)

            str_ps = str_ps + ' * ' + app + '\n\n'

            if ps_rt is not None:
                for line in ps_rt.splitlines():
                    line = line.decode('utf-8')
                    str_ps = str_ps + line + '\n'
            else:
                LOG.error("\'%s\' Swarm PS Check Error", node_ip)
                str_ps = str_ps + 'Command failure(' + app + ')\n'

            str_ps = str_ps + '\n'
    except:
        LOG.exception()

    try:
        sql = 'UPDATE ' + DB.SWARM_TBL + \
              ' SET node = \'' + str_node + '\',' + \
              ' service = \'' + str_service + '\',' + \
              ' ps = \'' + str_ps + '\'' + \
              ' WHERE nodename = \'' + node_name + '\''
        db_log.write_log('----- UPDATE SWARM INFO -----\n' + sql)

        if DB.sql_execute(sql, conn) != 'SUCCESS':
            db_log.write_log('[FAIL] SWARN DB Update Fail.')
    except:
        LOG.exception()

    return ret_app, ret_node
Esempio n. 18
0
def swarm_node_check(conn, db_log, node_name, username, node_ip,
                     swarm_manager):
    node_status = 'ok'
    node_list = []
    fail_reason = []

    try:
        cmd = 'ssh root@' + swarm_manager + ' \"sudo docker node ls\"'
        node_rt = SshCommand.ssh_exec(username, node_ip, cmd)

        if node_rt is not None:
            try:
                leader_flag = False
                for line in node_rt.splitlines():
                    line = line.decode('utf-8')

                    line = " ".join(line.replace('*', '').split())
                    tmp = line.split(' ')

                    if line.startswith('ID'):
                        continue

                    if 'Leader' in line:
                        node_json = {
                            'hostname': tmp[1],
                            'status': tmp[2],
                            'availability': tmp[3],
                            'manager': tmp[4]
                        }
                        leader_flag = True

                        if not ('Ready' in line and 'Active' in line):
                            node_status = 'nok'
                            fail_reason.append(tmp[1] + ' node is not ready.')
                    else:
                        node_json = {
                            'hostname': tmp[1],
                            'status': tmp[2],
                            'availability': tmp[3],
                            'manager': ''
                        }

                    if 'Down' in line:
                        node_status = 'nok'
                        fail_reason.append(tmp[1] + ' node is down.')

                    node_list.append(node_json)

                if not leader_flag:
                    node_status = 'nok'
                    fail_reason.append('swarm leader node does not exist.')
            except:
                LOG.exception()
                node_status = 'nok'

        else:
            LOG.error("\'%s\' Swarm Node Check Error", node_ip)
            node_status = 'fail'

        try:
            sql = 'UPDATE ' + DB.SWARM_TBL + \
                  ' SET node = \"' + str(node_list) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE SWARM NODE INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] SWARM NODE DB Update Fail.')
        except:
            LOG.exception()

    except:
        LOG.exception()
        node_status = 'fail'

    return node_status, fail_reason
Esempio n. 19
0
def swarm_service_check(conn, db_log, node_name, username, node_ip,
                        swarm_manager):
    service_status = 'ok'
    service_list = []
    ps_list = []
    fail_reason = []

    try:
        cmd = 'ssh root@' + swarm_manager + ' \"sudo docker service ls\"'
        service_rt = SshCommand.ssh_exec(username, node_ip, cmd)

        instance_list = get_service_list()

        if service_rt is not None:
            try:
                for svc in instance_list:
                    find_flag = False
                    for line in service_rt.splitlines():
                        line = line.decode('utf-8')

                        if line.startswith('ID'):
                            continue

                        id, name, mode, rep, img = line.split()

                        if svc == name:
                            find_flag = True
                            rep_tmp = rep.split('/')

                            if not (rep_tmp[0] == rep_tmp[1]):
                                service_status = 'nok'
                                svc_json = {
                                    'name': name,
                                    'mode': mode,
                                    'replicas': rep,
                                    'image': img,
                                    'status': 'nok',
                                    'monitor_item': True
                                }
                                fail_reason.append(svc_json)
                            else:
                                svc_json = {
                                    'name': name,
                                    'mode': mode,
                                    'replicas': rep,
                                    'image': img,
                                    'status': 'ok',
                                    'monitor_item': True
                                }

                            service_list.append(svc_json)

                    if not find_flag:
                        service_status = 'nok'
                        fail_reason.append('swarm ' + svc +
                                           ' service does not exist.')
                        break

                for line in service_rt.splitlines():
                    line = line.decode('utf-8')

                    if line.startswith('ID'):
                        continue

                    id, name, mode, rep, img = line.split()

                    if name in instance_list:
                        continue

                    rep_tmp = rep.split('/')

                    if not (rep_tmp[0] == rep_tmp[1]):
                        svc_json = {
                            'name': name,
                            'mode': mode,
                            'replicas': rep,
                            'image': img,
                            'status': 'nok',
                            'monitor_item': False
                        }
                    else:
                        svc_json = {
                            'name': name,
                            'mode': mode,
                            'replicas': rep,
                            'image': img,
                            'status': 'ok',
                            'monitor_item': False
                        }

                    service_list.append(svc_json)

            except:
                LOG.exception()
                service_status = 'fail'

        else:
            LOG.error("\'%s\' Swarm Service Check Error", node_ip)
            service_status = 'fail'

        for app in instance_list:
            cmd = 'ssh root@' + swarm_manager + ' \"sudo docker service ps ' + app + '\"'
            ps_rt = SshCommand.ssh_exec(username, node_ip, cmd)

            if ps_rt is not None:
                for line in ps_rt.splitlines():
                    line = line.decode('utf-8')

                    if line.startswith('ID'):
                        continue

                    line = line.replace(' \_ ', '')

                    line = " ".join(line.split())
                    tmp = line.split(' ')

                    ps_json = {
                        'name': tmp[1],
                        'image': tmp[2],
                        'node': tmp[3],
                        'desired_state': tmp[4],
                        'current_state': tmp[5]
                    }
                    ps_list.append(ps_json)

            else:
                LOG.error("\'%s\' Swarm PS Check Error", node_ip)

        try:
            sql = 'UPDATE ' + DB.SWARM_TBL + \
                  ' SET service = \"' + str(service_list) + '\",' + \
                  ' ps = \"' + str(ps_list) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE SWARM SERVICE/PS INFO -----\n' +
                             sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] SWARM SERVICE/PS DB Update Fail.')
        except:
            LOG.exception()

    except:
        LOG.exception()
        service_status = 'fail'

    return service_status, fail_reason
Esempio n. 20
0
def onos_app_check(conn, db_log, node_name, node_ip):
    try:
        app_rt = SshCommand.onos_ssh_exec(node_ip, 'apps -a -s')

        status = 'ok'
        app_active_list = list()

        app_list = []
        fail_reason = []

        if app_rt is not None:
            for line in app_rt.splitlines():
                app_active_list.append(line.split(".")[2].split()[0])

            if not 'cpman' in app_active_list:
                # activate cpman
                LOG.info('Cpman does not exist. Activate cpman')
                SshCommand.onos_ssh_exec(node_ip,
                                         'app activate org.onosproject.cpman')

            for app in CONF.onos()['app_list']:
                if app in app_active_list:
                    app_json = {
                        'name': app,
                        'status': 'ok',
                        'monitor_item': True
                    }
                    app_active_list.remove(app)
                else:
                    status = 'nok'
                    app_json = {
                        'name': app,
                        'status': 'nok',
                        'monitor_item': True
                    }
                    fail_reason.append(app_json)
                app_list.append(app_json)

            for app in app_active_list:
                app_json = {'name': app, 'status': 'ok', 'monitor_item': False}
                app_list.append(app_json)
        else:
            LOG.error("\'%s\' ONOS Application Check Error", node_ip)
            status = 'fail'
            app_list = 'fail'

        try:
            sql = 'UPDATE ' + DB.ONOS_TBL + \
                  ' SET applist = \"' + str(app_list) + '\"' +\
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE ONOS APP INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] ONOS APP DB Update Fail.')
        except:
            LOG.exception()
    except:
        LOG.exception()
        status = 'fail'

    return status, fail_reason
Esempio n. 21
0
def periodic(conn, pre_stat, db_log):
    try:
        cur_info = {}
        #LOG.info('Periodic checking %s', str(CONF.watchdog()['check_system']))

        try:
            node_list = cmd_proc.get_node_list('all', 'nodename, ip_addr, username, type, sub_type')
            if not node_list:
                LOG.info("Not Exist Node data ...")
                return
        except:
            LOG.exception()
            return

        # Read cur alarm status
        sql = 'SELECT nodename, item, grade FROM ' + DB.EVENT_TBL

        db_log.write_log(sql)
        cur_grade = conn.cursor().execute(sql).fetchall()

        old_nok_count = 0;
        for nodename, item, grade in cur_grade:
            if not cur_info.has_key(nodename):
                cur_info[nodename] = {}
            cur_info[nodename][item] = grade
            if grade != 'ok':
                old_nok_count += 1

        new_nok_count = 0;
        for node_name, node_ip, user_name, type, sub_type in node_list:
            #LOG.info('------------------------------------ ' + node_name + ' START ------------------------------------')

            onos_cluster = 'fail'
            onos_device = 'fail'
            onos_link = 'fail'
            onos_app = 'fail'

            # ping check
            ping = net_check(node_ip)
            ping_reason = []
            if ping != 'ok':
                reason.append('ping check failed on ' + node_ip)
                new_nok_count += 1
            ping = alarm_event.process_event(conn, db_log, node_name, type, 'PING', cur_info[node_name]['PING'], ping, ping_reason)

            if ping == 'ok':
                if type.upper() == 'ONOS':
                    # check connection
                    onos_cluster, onos_device, onos_link, onos_app, cluster_reason, device_reason, link_reason, app_reason = chk_onos.onos_check(conn, db_log, node_name, node_ip)
                    onos_cluster = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_CLUSTER',
                                                             cur_info[node_name]['ONOS_CLUSTER'], onos_cluster, cluster_reason)
                    onos_device = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_DEVICE',
                                                             cur_info[node_name]['ONOS_DEVICE'], onos_device, device_reason)
                    onos_link = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_LINK',
                                                             cur_info[node_name]['ONOS_LINK'], onos_link, link_reason)
                    onos_app = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_APP',
                                                             cur_info[node_name]['ONOS_APP'], onos_app, app_reason)
                    if onos_cluster != 'ok': new_nok_count += 1
                    if onos_device != 'ok': new_nok_count += 1
                    if onos_link != 'ok': new_nok_count += 1
                    if onos_app != 'ok': new_nok_count += 1

            try:
                sql = 'UPDATE ' + DB.STATUS_TBL + \
                      ' SET' + \
                      ' PING = \'' + ping + '\',' + \
                      ' ONOS_CLUSTER = \'' + onos_cluster + '\',' + \
                      ' ONOS_DEVICE = \'' + onos_device + '\',' + \
                      ' ONOS_LINK = \'' + onos_link + '\',' + \
                      ' ONOS_APP = \'' + onos_app + '\',' + \
                      ' time = \'' + str(datetime.now()) + '\'' + \
                      ' WHERE nodename = \'' + node_name + '\''
                db_log.write_log('----- UPDATE TOTAL SYSTEM INFO -----\n' + sql)

                if DB.sql_execute(sql, conn) != 'SUCCESS':
                    db_log.write_log('[FAIL] TOTAL SYSTEM INFO DB Update Fail.')
            except:
                LOG.exception()

            # do not version log on everthing is ok
            if old_nok_count > 0:
                LOG.info('chk_onos[%s]: ping=%s cluster=%s device=%s link=%s app=%s' %
                         (node_name, ping, onos_cluster, onos_device, onos_link, onos_app))

        if old_nok_count > 0 and new_nok_count == 0:
            alarm_event.process_event(conn, db_log, 'ALL', 'SITE', 'STATUS', 'none', 'ok', []) 

        # send all alarm messages pending
        alarm_event.flush_event_alarm();

    except:
        LOG.exception()

    return pre_stat
Esempio n. 22
0
def vrouter_check(conn, db_log, node_name, user_name, node_ip):
    ret_docker = 'ok'

    docker_list = []
    fail_list = []

    onos_id = ''

    docker_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker ps')

    if docker_rt is not None:
        try:
            for docker in CONF.openstack()['docker_list']:
                for line in docker_rt.splitlines():
                    if line.startswith('CONTAINER'):
                        continue

                    tmp_line = line.split()

                    if ' ' + docker in line:
                         if not 'Up' in line:
                             docker_json = {'name': docker, 'status': 'nok', 'type': 'docker'}
                             fail_list.append(docker_json)
                             ret_docker = 'nok'
                         else:
                             docker_json = {'name': docker, 'status': 'ok', 'type': 'docker'}

                         docker_list.append(docker_json)

                    if 'onos' in tmp_line[1]:
                        onos_id = tmp_line[0]
        except:
            LOG.exception()
    else:
        LOG.error("\'%s\' Vrouter Node Check Error", node_ip)
        ret_docker = 'fail'

    onos_app_list = []
    route_list = []

    if not onos_id == '':
        try:
            # get onos container ip
            onos_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker inspect ' + onos_id + ' | grep IPAddress')

            if onos_rt is not None:
                for line in onos_rt.splitlines():
                    line = line.strip()
                    if line.startswith('\"IPAddress'):
                        tmp = line.split(':')
                        onos_ip = tmp[1].strip().replace('\"', '').replace(',', '')
                        break

                app_list = SshCommand.ssh_pexpect(user_name, node_ip, onos_ip, 'apps -a -s')

                app_active_list = list()
                for line in app_list.splitlines():
                    if line.startswith('fail'):
                        continue

                    app_active_list.append(line.split(".")[2].split()[0])

                for app in CONF.openstack()['onos_vrouter_app_list']:
                    if app in app_active_list:
                        app_json = {'name': app, 'status': 'ok', 'type': 'onos_app'}
                    else:
                        app_json = {'name': app, 'status': 'nok', 'type': 'onos_app'}
                        fail_list.append(app_json)
                        ret_docker = 'nok'

                    onos_app_list.append(app_json)

                str_route = SshCommand.ssh_pexpect(user_name, node_ip, onos_ip, 'routes')

                for line in str_route.splitlines():
                    line = line.strip()

                    if (line.startswith('Table') or line.startswith('Network') or line.startswith('Total')):
                        continue

                    new_line = " ".join(line.split())

                    if new_line.startswith('fail'):
                        continue

                    tmp = new_line.split(' ')
                    route_json = {'network': tmp[0], 'next_hop': tmp[1]}
                    route_list.append(route_json)
        except:
            LOG.exception()
    else:
        LOG.info('can not find onos_id.')
        ret_docker = 'fail'

    try:
        sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
              ' SET docker = \"' + str(docker_list) + '\",' + \
              ' onosApp = \"' + str(onos_app_list) + '\",' + \
              ' routingTable = \"' + str(route_list) + '\"' + \
              ' WHERE nodename = \'' + node_name + '\''
        db_log.write_log('----- UPDATE GATEWAY INFO -----\n' + sql)

        if DB.sql_execute(sql, conn) != 'SUCCESS':
            db_log.write_log('[FAIL] GATEWAY DB Update Fail.')
    except:
        LOG.exception()

    return ret_docker, fail_list
Esempio n. 23
0
    def run(self):
        db_log = USER_LOG()
        db_log.set_log('db.log',
                       CONF.base()['log_rotate_time'],
                       CONF.base()['log_backup_count'])

        pre_stat = dict()

        # DB initiation
        DB.db_initiation(db_log)

        # Start RESTful server
        try:
            REST_SVR.rest_server_start()
        except:
            print 'Rest Server failed to start'
            LOG.exception()
            self.exit()

        # Periodic monitoring
        if CONF.watchdog()['interval'] == 0:
            LOG.info("--- Not running periodic monitoring ---")
            while True:
                time.sleep(3600)
        else:
            LOG.info("--- Periodic Monitoring Start ---")
            history_log.write_log("--- Event History Start ---")

            conn = DB.connection()

            exitFlag = False
            while True:
                try:
                    i = 0
                    while i < 3:
                        i = i + 1
                        # check rest server
                        try:
                            url = 'http://' + socket.gethostbyname(
                                socket.gethostname()) + ':' + str(CONF.rest(
                                )['rest_server_port']) + '/alive-check'

                            cmd = 'curl -X GET \"' + url + '\"'
                            LOG.info('cmd = ' + cmd)
                            result = Popen(cmd,
                                           stdout=PIPE,
                                           stderr=PIPE,
                                           shell=True)
                            output, error = result.communicate()

                            if result.returncode != 0:
                                LOG.info('REST SERVER CHECK FAIL [' + str(i) +
                                         ']')

                                if i == 3:
                                    LOG.info('fail to check rest server.')
                                    alarm_event.push_event(
                                        'sonawatcher',
                                        'SONAWATCHER_DISCONNECT', 'critical',
                                        'normal',
                                        'sonawatcher server shutdown',
                                        str(datetime.now()))
                                    conn.close()
                                    exitFlag = True
                                    self.exit()
                                    break
                            else:
                                break

                        except:
                            LOG.exception()

                    if exitFlag:
                        break

                    pre_stat = watchdog.periodic(conn, pre_stat, db_log)

                    time.sleep(CONF.watchdog()['interval'])
                except:
                    alarm_event.push_event('sonawatcher',
                                           'SONAWATCHER_DISCONNECT',
                                           'critical', 'normal',
                                           'sonawatcher server shutdown',
                                           str(datetime.now()))
                    conn.close()
                    LOG.exception()
Esempio n. 24
0
def get_node_traffic(conn, db_log, node_name, rx_dic, tx_dic, total_rx, total_tx, err_info, pre_stat):
    try:
        status = 'ok'
        reason_list = []

        pre_total_rx = total_rx
        pre_total_tx = total_tx

        # check minimum packet count
        sql = 'SELECT data_ip FROM ' + DB.OPENSTACK_TBL + ' WHERE nodename = \'' + node_name + '\''
        data_ip = conn.cursor().execute(sql).fetchone()[0]

        sql = 'SELECT ip_addr FROM ' + DB.NODE_INFO_TBL + ' WHERE type = \'ONOS\''
        nodes_info = conn.cursor().execute(sql).fetchall()

        min_rx = 0
        if len(nodes_info) == 0:
            LOG.info('Fail to load onos list')
            status = 'fail'
        else:
            for ip in nodes_info:
                flows_rt = SshCommand.onos_ssh_exec(ip[0], '\"flows --filter \'{tunnelDst=' + data_ip + '}\' --short\"')

                if flows_rt is not None:
                    for line in flows_rt.splitlines():
                        if 'tunnelDst' in line:
                            min_rx = min_rx + int(line.split(',')[2].split('=')[1])
                    break

        if not dict(pre_stat).has_key(node_name + '_VXLAN'):
            status = '-'
            ratio = -1
        else:
            total_rx = total_rx - int(dict(pre_stat)[node_name + '_VXLAN']['total_rx'])
            total_tx = total_tx - int(dict(pre_stat)[node_name + '_VXLAN']['total_tx'])
            cur_min = min_rx - int(dict(pre_stat)[node_name + '_VXLAN']['min_rx'])

            if total_rx == 0 and total_tx == 0:
                ratio = 100
            elif total_tx <= 0 or total_tx < 0:
                LOG.info('Node Traffic Ratio Fail.')
                ratio = 0
            else:
                ratio = float(total_rx) * 100 / total_tx

        LOG.info('Node Traffic Ratio = ' + str(ratio))

        port_json = {'rx': rx_dic[node_name], 'minimum_rx': min_rx, 'rx_drop': err_info['rx_drop'], 'rx_errs': err_info['rx_err'],
                      'tx': tx_dic[node_name], 'tx_drop': err_info['tx_drop'], 'tx_errs': err_info['tx_err']}

        description = ''

        if not status == '-':
            description = 'Ratio of success for all nodes = ' + str(ratio)  + ' (' + str(total_rx) + ' / ' + str(total_tx) + ')'

            if ratio < float(CONF.alarm()['node_traffic_ratio']):
                LOG.info('[NODE TRAFFIC] ratio nok')
                status = 'nok'

            if total_rx < cur_min:
                LOG.info('CUR_MIN_RX = ' + str(cur_min) + ', CUR_RX = ' + str(total_rx) + ', Less than rx minimum.')
                status = 'nok'

            if err_info['rx_drop'] - int(dict(pre_stat)[node_name + '_VXLAN']['rx_drop']) > 0:
                LOG.info('[NODE TRAFFIC] rx_drop nok')
                status = 'nok'

            if err_info['rx_err'] - int(dict(pre_stat)[node_name + '_VXLAN']['rx_err']) > 0:
                LOG.info('[NODE TRAFFIC] rx_err nok')
                status = 'nok'

            if err_info['tx_drop'] - int(dict(pre_stat)[node_name + '_VXLAN']['tx_drop']) > 0:
                LOG.info('[NODE TRAFFIC] tx_drop nok')
                status = 'nok'

            if err_info['tx_err'] - int(dict(pre_stat)[node_name + '_VXLAN']['tx_err']) > 0:
                LOG.info('[NODE TRAFFIC] tx_err nok')
                status = 'nok'

        in_out_dic = dict()
        in_out_dic['total_rx'] = pre_total_rx
        in_out_dic['total_tx'] = pre_total_tx

        in_out_dic['min_rx'] = min_rx

        in_out_dic['rx_drop'] = err_info['rx_drop']
        in_out_dic['rx_err'] = err_info['rx_err']
        in_out_dic['tx_drop'] = err_info['tx_drop']
        in_out_dic['tx_err'] = err_info['tx_err']

        pre_stat[node_name + '_VXLAN'] = in_out_dic
    except:
        LOG.exception()
        status = 'fail'

    vxlan_json = {'port_stat_vxlan': port_json, 'period': CONF.watchdog()['interval'],
                  'ratio': format(ratio, '.2f'), 'current_rx': total_rx, 'current_tx': total_tx,
                  'description': description, 'threshold': CONF.alarm()['node_traffic_ratio'], 'status': status}

    try:
        sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
              ' SET vxlan_traffic = \"' + str(vxlan_json) + '\"' + \
              ' WHERE nodename = \'' + node_name + '\''
        db_log.write_log('----- UPDATE VXLAN STAT INFO -----\n' + sql)

        if DB.sql_execute(sql, conn) != 'SUCCESS':
            db_log.write_log('[FAIL] VXLAN STAT DB Update Fail.')
    except:
        LOG.exception()

    if not status == 'ok':
        reason_list.append(vxlan_json)

    return status, pre_stat, reason_list
Esempio n. 25
0
def get_internal_traffic(conn, db_log, node_name, node_ip, user_name, sub_type, rx_count, patch_tx, pre_stat):
    try:
        status = 'ok'
        in_packet = 0
        out_packet = 0

        reason_list = []
        desc = ''

        if sub_type == 'COMPUTE':
            flow_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo ovs-ofctl -O OpenFlow13 dump-flows br-int')

            inport_cnt = 0
            gw_cnt = 0
            output_cnt = 0

            if flow_rt is not None:
                for line in flow_rt.splitlines():
                    tmp = line.split(',')
                    if 'in_port' in line:
                        inport_cnt = inport_cnt + int(tmp[3].split('=')[1])
                    elif 'output' in line:
                        output_cnt = output_cnt + int(tmp[3].split('=')[1])
                    elif 'actions=group' in line:
                        gw_cnt = gw_cnt + int(tmp[3].split('=')[1])

                in_packet = inport_cnt + rx_count
                out_packet = gw_cnt + output_cnt

                port_json = {'vm_tx': inport_cnt, 'vxlan_rx': rx_count, 'out_gw': gw_cnt, 'output': output_cnt}
            else:
                port_json = {'vm_tx': -1, 'vxlan_rx': -1, 'out_gw': -1, 'output': -1}
                status = 'fail'

        else:
            port_json = {'vxlan_rx': rx_count, 'patch-integ': patch_tx}

            if patch_tx == -1:
                status = 'fail'
            else:
                in_packet = rx_count
                out_packet = patch_tx

        for_save_in = in_packet
        for_save_out = out_packet

        if not dict(pre_stat).has_key(node_name + '_internal'):
            status = '-'
            vxlan_json = {'port_stat_in_out': port_json, 'period': CONF.watchdog()['interval'],
                          'ratio': 0, 'current_rx': -1, 'current_tx': -1,
                          'description': desc, 'threshold': CONF.alarm()['internal_traffic_ratio'], 'status': status}
        elif status == 'ok':
            in_packet = in_packet - int(dict(pre_stat)[node_name + '_internal']['in_packet'])
            out_packet = out_packet - int(dict(pre_stat)[node_name + '_internal']['out_packet'])

            if in_packet == 0 and out_packet == 0:
                ratio = 100
            elif in_packet <= 0 or out_packet < 0:
                LOG.info('Internal Traffic Ratio Fail.')
                ratio = 0
            else:
                ratio = float(out_packet) * 100 / in_packet

            LOG.info('Internal Traffic Ratio = ' + str(ratio))
            desc = 'Internal Traffic Ratio = ' + str(ratio) + '(' + str(out_packet) + '/' + str(in_packet) + ')'

            if ratio < float(CONF.alarm()['internal_traffic_ratio']):
                status = 'nok'

            vxlan_json = {'port_stat_in_out': port_json, 'period': CONF.watchdog()['interval'],
                          'ratio': format(ratio, '.2f'), 'current_rx': in_packet, 'current_tx': out_packet,
                          'description': desc, 'threshold': CONF.alarm()['internal_traffic_ratio'], 'status': status}

        in_out_dic = dict()
        in_out_dic['in_packet'] = for_save_in
        in_out_dic['out_packet'] = for_save_out
        pre_stat[node_name + '_internal'] = in_out_dic

        try:
            sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
                  ' SET internal_traffic = \"' + str(vxlan_json) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE INTERNAL TRAFFIC INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] INTERNAL TRAFFIC DB Update Fail.')
        except:
            LOG.exception()
    except:
        LOG.exception()
        status = 'fail'

    if not status == 'ok':
        reason_list.append(vxlan_json)

    return status, pre_stat, reason_list
Esempio n. 26
0
def get_gw_ratio_compute(conn, db_log, node_ip, node_name, pre_stat):
    status = 'ok'
    reason = []

    try:
        sql = 'SELECT ' + DB.ONOS_TBL + '.nodename, nodelist, ip_addr' + ' FROM ' + DB.ONOS_TBL + \
                ' INNER JOIN ' + DB.NODE_INFO_TBL + ' ON ' + DB.ONOS_TBL + '.nodename = ' + DB.NODE_INFO_TBL + '.nodename'

        nodes_info = conn.cursor().execute(sql).fetchall()

        if len(nodes_info) == 0:
            LOG.info('Fail to load onos list')
            return 'fail', pre_stat, reason

        manage_ip = ''
        hostname = ''
        for nodename, nodelist, ip in nodes_info:
            if not nodelist == 'none':
                for node_info in eval(nodelist):
                    try:
                        if dict(node_info)['management_ip'] == node_ip:
                            manage_ip = ip
                            hostname = dict(node_info)['hostname']
                    except:
                        manage_ip = ''

                    if not manage_ip == '':
                        break
            if not manage_ip == '':
                break

        if hostname == '':
            LOG.info('Can not find hostname')
            return 'fail', pre_stat, reason

        try:
            sql = 'SELECT of_id FROM ' + DB.OPENSTACK_TBL + ' WHERE hostname = \'' + str(hostname) + '\''
            LOG.info(sql)
            node_info = conn.cursor().execute(sql).fetchone()

            of_id = node_info[0]
        except:
            LOG.exception()
            LOG.info('Can not find of_id')
            return 'fail', pre_stat, reason

        group_rt = SshCommand.onos_ssh_exec(manage_ip, 'groups')

        total_cnt = 0
        gw_list = []
        if group_rt is not None:
            for line in group_rt.splitlines():
                if of_id in line:
                    tmp = line.split(',')

                    for col in tmp:
                        if 'packets=' in col:
                            total_cnt = total_cnt + int(col.split('=')[1])
                            gw_list.append(int(col.split('=')[1]))

        str_ratio = ''

        if not dict(pre_stat).has_key(node_name + '_GW'):
            status = '-'
            json_ratio = {'ratio': '-', 'status': status, 'period':CONF.watchdog()['interval'], 'status': status}
        else:
            i = 0
            for gw in gw_list:
                cur_gw = gw - pre_stat[node_name + '_GW']['gw_list'][i]
                cur_total = total_cnt - pre_stat[node_name + '_GW']['gw_total']

                LOG.info('cur_gw = ' + str(cur_gw))
                LOG.info('cur_total = ' + str(cur_total))

                if cur_gw == 0 and cur_total == 0:
                    ratio = 100/len(gw_list)
                elif cur_gw <= 0 or cur_total <= 0:
                    ratio = 0
                else:
                    ratio = float(cur_gw) * 100 / cur_total

                i = i + 1
                str_ratio = str_ratio + str(ratio) + ':'

                if ratio < float(CONF.alarm()['gw_ratio']):
                    status = 'nok'

            json_ratio = {'ratio': str_ratio.rstrip(':'), 'status': status, 'period':CONF.watchdog()['interval'], 'status': status}
            LOG.info('[COMPUTE] ' + 'GW_RATIO = ' + str_ratio.rstrip(':'))

        try:
            sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
                  ' SET gw_ratio = \"' + str(json_ratio) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE TRAFFIC GW INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] TRAFFIC GW DB Update Fail.')
        except:
            LOG.exception()

        in_out_dic = dict()
        in_out_dic['gw_list'] = gw_list
        in_out_dic['gw_total'] = total_cnt

        pre_stat[node_name + '_GW'] = in_out_dic

    except:
        LOG.exception()
        status = 'fail'

    if not status == 'ok':
        reason.append(json_ratio)

    return status, pre_stat, reason
Esempio n. 27
0
def get_gw_ratio_gateway(conn, db_log, node_ip, node_name, rx, gw_rx_sum, pre_stat):
    status = 'ok'
    reason = []

    try:
        sql = 'SELECT ' + DB.ONOS_TBL + '.nodename, nodelist, ip_addr' + ' FROM ' + DB.ONOS_TBL + \
                ' INNER JOIN ' + DB.NODE_INFO_TBL + ' ON ' + DB.ONOS_TBL + '.nodename = ' + DB.NODE_INFO_TBL + '.nodename'

        nodes_info = conn.cursor().execute(sql).fetchall()

        if len(nodes_info) == 0:
            LOG.info('Fail to load onos list')
            return 'fail', pre_stat, reason

        # search data_ip
        data_ip = ''
        manage_ip = ''
        cpt_to_gw_packet = 0
        for nodename, nodelist, ip in nodes_info:
            if not nodelist == 'none':
                for node_info in eval(nodelist):
                    try:
                        if dict(node_info)['management_ip'] == node_ip:
                            manage_ip = ip
                            data_ip = dict(node_info)['data_ip']
                    except:
                        manage_ip = ''

                    if not manage_ip == '':
                        break
            if not manage_ip == '':
                break

        if data_ip == '':
            LOG.info('Can not find data ip')
            return 'fail', pre_stat, reason

        group_rt = SshCommand.onos_ssh_exec(manage_ip, 'groups')

        if group_rt is not None:
            for line in group_rt.splitlines():
                if '{tunnelDst=' + data_ip + '}' in line:
                    tmp = line.split(',')

                    for col in tmp:
                        if 'packets=' in col:
                            cpt_to_gw_packet = cpt_to_gw_packet + int(col.split('=')[1])

        if not dict(pre_stat).has_key(node_name + '_GW'):
            status = '-'
            json_ratio = {'current_rx': '-', 'current_compute_tx': '-', 'current_total': '-',
                          'ratio': '-',
                          'period': CONF.watchdog()['interval'], 'status': status, 'packet_loss': False,
                          'description': ''}
        else:
            cur_rx = rx - int(dict(pre_stat)[node_name + '_GW']['rx'])
            cur_total = gw_rx_sum - int(dict(pre_stat)[node_name + '_GW']['gw_rx_sum'])
            cur_packet = cpt_to_gw_packet - int(dict(pre_stat)[node_name + '_GW']['cpt_to_gw_packet'])

            if cur_rx == 0 and cur_total == 0:
                ratio = 100
            elif cur_rx <= 0 or cur_total < 0:
                ratio = 0
            else:
                ratio = float(cur_rx) * 100 / cur_total

            desc = 'GW RATIO = ' + str(ratio) + ' (' + str(cur_rx) + ' / ' + str(cur_total) + ')'

            loss_flag = False
            if cur_rx < cur_packet:
                LOG.info('GW Ratio Fail. (Data loss)')
                loss_flag = True

            LOG.info('GW Ratio = ' + str(ratio))

            if ratio < float(CONF.alarm()['gw_ratio']) or cur_rx < cur_packet:
                status = 'nok'

            json_ratio = {'current_rx': cur_rx, 'current_compute_tx': cur_packet, 'current_total': cur_total, 'ratio': format(ratio, '.2f'),
                          'period':CONF.watchdog()['interval'], 'status': status, 'packet_loss': loss_flag, 'description': desc}

        try:
            sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
                  ' SET gw_ratio = \"' + str(json_ratio) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE TRAFFIC GW INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] TRAFFIC GW DB Update Fail.')
        except:
            LOG.exception()

        in_out_dic = dict()
        in_out_dic['rx'] = rx
        in_out_dic['gw_rx_sum'] = gw_rx_sum
        in_out_dic['cpt_to_gw_packet'] = cpt_to_gw_packet

        pre_stat[node_name + '_GW'] = in_out_dic
    except:
        LOG.exception()
        status = 'fail'

    if not status == 'ok':
        reason.append(json_ratio)

    return status, pre_stat, reason
Esempio n. 28
0
def onos_rest_check(conn, db_log, node_name, node_ip):
    try:
        web_status = 'ok'

        web_list = []
        fail_reason = []

        web_rt = SshCommand.onos_ssh_exec(node_ip, 'web:list')

        if web_rt is not None:
            for web in CONF.onos()['rest_list']:
                for line in web_rt.splitlines():
                    if line.startswith('ID') or line.startswith('--'):
                        continue

                    if ' ' + web + ' ' in line:
                        if not ('Active' in line and 'Deployed' in line):
                            rest_json = {
                                'name': web,
                                'status': 'nok',
                                'monitor_item': True
                            }
                            fail_reason.append(rest_json)
                            web_status = 'nok'
                        else:
                            rest_json = {
                                'name': web,
                                'status': 'ok',
                                'monitor_item': True
                            }

                        web_list.append(rest_json)

            for line in web_rt.splitlines():
                if line.startswith('ID') or line.startswith('--'):
                    continue

                name = " ".join(line.split()).split(' ')[10]

                if not name in CONF.onos()['rest_list']:
                    if not ('Active' in line and 'Deployed' in line):
                        rest_json = {
                            'name': name,
                            'status': 'nok',
                            'monitor_item': False
                        }
                    else:
                        rest_json = {
                            'name': name,
                            'status': 'ok',
                            'monitor_item': False
                        }

                    web_list.append(rest_json)
        else:
            LOG.error("\'%s\' ONOS Rest Check Error", node_ip)
            web_status = 'fail'
            web_list = 'fail'

        try:
            sql = 'UPDATE ' + DB.ONOS_TBL + \
                  ' SET weblist = \"' + str(web_list) + '\"' +\
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE ONOS REST INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] ONOS REST DB Update Fail.')
        except:
            LOG.exception()

    except:
        LOG.exception()
        web_status = 'fail'

    return web_status, fail_reason
Esempio n. 29
0
def xos_sync_check(conn, db_log, node_name):
    swarm_sync = 'ok'
    sync_list = []
    fail_reason = []

    try:
        url = CONF.xos()['xos_rest_server']
        account = CONF.xos()['xos_rest_account']

        cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/diags/'
        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("Cmd Fail, cause => %s", error)
            return 'fail', None

        sync_array = json.loads(output)

        for xos_info in sync_array:
            backend_status = xos_info['backend_status']

            LOG.info('xos_sync_backend_status = ' + backend_status)

            tmp = str(backend_status).split('-')

            if tmp[0].strip() in ['0', '1']:
                status = 'ok'
            else:
                status = 'nok'

            # check time
            last_time = json.loads(xos_info['backend_register'])['last_run']
            cur_time = time.time()

            interval = cur_time - last_time
            interval = int(interval)

            if interval >= 30:
                status = 'nok'

            xos_json = {
                'name': xos_info['name'],
                'status': status,
                'description': tmp[1].strip(),
                'last_run_interval': interval
            }
            sync_list.append(xos_json)

            if status == 'nok':
                swarm_sync = 'nok'
                fail_reason.append(xos_json)

            try:
                sql = 'UPDATE ' + DB.XOS_TBL + \
                      ' SET synchronizer = \"' + str(sync_list) + '\"' + \
                      ' WHERE nodename = \'' + node_name + '\''
                db_log.write_log('----- UPDATE SYNCHRONIZER INFO -----\n' +
                                 sql)

                if DB.sql_execute(sql, conn) != 'SUCCESS':
                    db_log.write_log('[FAIL] SYNCHRONIZER DB Update Fail.')
            except:
                LOG.exception()

    except:
        LOG.exception()
        swarm_sync = 'fail'

    return swarm_sync, fail_reason
Esempio n. 30
0
def onos_check(conn, db_log, node_name, node_ip):
    # called on each ONOS node in NODE_INFO_TBL
    try:
        # check cluster nodes
        node_list = []
        node_status = 'ok'
        node_fail_reason = []
        ret, rsp = onos_api_req(node_ip, 'onos/v1/cluster')
        if rsp is not None:
            try:
                node_tbl = dict()
                for node in rsp['nodes']:
                    node_tbl[node['ip']] = node

                for onos_node in CONF.onos()['list']:
                    if len(onos_node.split(':')) != 2:
                        continue
                    id = onos_node.split(':')[0]
                    ip = onos_node.split(':')[1]
                    if id is '' or ip is '':
                        continue
                    if ip in node_tbl:
                        node = node_tbl[ip]
                        node['id'] = id
                        node['monitor_item'] = True
                        if node['status'] != 'READY':
                            node_status = 'nok'
                            node_fail_reason.append('Node ' + id + ' DOWN')
                        node_tbl.pop(ip)
                    else:
                        node = {
                            'id': id,
                            'ip': ip,
                            'status': 'nok',
                            'monitor_item': True
                        }
                        node_status = 'nok'
                        node_fail_reason.append('Node ' + id + ' DOWN')
                    node_list.append(node)

                for node in node_tbl.values():
                    node['monitor_item'] = False
                    node_list.append(node)

            except:
                LOG.exception()
                LOG.error("\'%s\' ONOS Check Error(nodes)", node_ip)
                node_status = 'fail'

        # check devices
        device_list = []
        device_status = 'ok'
        device_fail_reason = []
        ret, rsp = onos_api_req(node_ip, 'onos/v1/devices')
        if rsp is not None:
            try:
                device_tbl = dict()
                for device in rsp['devices']:
                    device['id'] = 'of:' + device['chassisId'].rjust(16, '0')
                    device_tbl[device['id']] = device

                for id in CONF.onos()['device_list']:
                    if id is '':
                        continue
                        # no config
                    if id in device_tbl:
                        device = device_tbl[id]
                        device['monitor_item'] = True
                        if not device['available']:
                            device_status = 'nok'
                            device_fail_reason.append('Device ' + id + ' DOWN')
                        device_tbl.pop(id)
                    else:
                        device = {
                            'id': id,
                            'available': False,
                            'channelId': '-',
                            'name': '-',
                            'role': '-',
                            'monitor_item': True
                        }
                        device_status = 'nok'
                        device_fail_reason.append('Device ' + id + ' DOWN')
                    device_list.append(device)

                for device in device_tbl.values():
                    device['monitor_item'] = False
                    device_list.append(device)

            except:
                LOG.exception()
                LOG.error("\'%s\' ONOS Check Error(devices)", node_ip)
                device_status = 'fail'
        else:
            LOG.error("\'%s\' ONOS Check Error(devices)", node_ip)
            device_status = 'fail'

        # check links
        link_list = []
        link_status = 'ok'
        link_fail_reason = []
        ret, rsp = onos_api_req(node_ip, 'onos/v1/links')
        if rsp is not None:
            try:
                link_tbl = dict()
                for link in rsp['links']:
                    link['src'] = link['src']['device'] + '/' + link['src'][
                        'port']
                    link['dst'] = link['dst']['device'] + '/' + link['dst'][
                        'port']
                    link_tbl[link['src'] + '-' + link['dst']] = link

                for id in CONF.onos()['link_list']:
                    if id is '':
                        continue
                    if len(id.split('-')) != 2:
                        link = {
                            'src': id,
                            'dst': '(invalid_link_config)',
                            'expected': 'false',
                            'state': '-',
                            'type': "-",
                            'monitor_item': True
                        }
                        link_status = 'nok'
                        link_fail_reason.append(
                            'Link ' + id + ' is configed as INVALID ID FORMAT')
                        link_list.append(link)
                        continue

                    if id in link_tbl:
                        link = link_tbl[id]
                        link['monitor_item'] = True
                        if link['state'] != 'ACTIVE':
                            link_status = 'nok'
                            link_fail_reason.append('Link ' + id + ' DOWN')
                        link_list.append(link)
                        link_tbl.pop(id)
                    else:
                        link = {
                            'src': id.split('-')[0],
                            'dst': id.split('-')[1],
                            'expected': 'false',
                            'state': '-',
                            'type': "-",
                            'monitor_item': True
                        }
                        link_status = 'nok'
                        link_fail_reason.append('Link ' + id + ' DOWN')
                        link_list.append(link)

                    rev_id = id.split('-')[1] + '-' + id.split('-')[0]
                    if rev_id in link_tbl:
                        link = link_tbl[rev_id]
                        link['monitor_item'] = True
                        if link['state'] != 'ACTIVE':
                            link_status = 'nok'
                            link_fail_reason.append('Link' + rev_id + ' DOWN')
                        link_list.append(link)
                        link_tbl.pop(rev_id)
                    else:
                        link = {
                            'src': rev_id.split('-')[0],
                            'dst': rev_id.split('-')[1],
                            'expected': 'false',
                            'state': '-',
                            'type': "-",
                            'monitor_item': True
                        }
                        link_status = 'nok'
                        link_fail_reason.append('Link ' + rev_id + ' DOWN')
                        link_list.append(link)

                for link in link_tbl.values():
                    link['monitor_item'] = False
                    link_list.append(link)

            except:
                LOG.exception()
                LOG.error("\'%s\' ONOS Check Error(links)", node_ip)
                link_status = 'fail'

        # check apps
        app_list = []
        app_status = 'ok'
        app_fail_reason = []
        ret, rsp = onos_api_req(node_ip, 'onos/v1/applications')
        if rsp is not None:
            try:
                active_app_list = []
                for app_rsp in rsp['applications']:
                    if app_rsp['state'] == 'ACTIVE':
                        active_app_list.append(app_rsp['name'].replace(
                            'org.onosproject.', ''))

                for app in CONF.onos()['app_list']:
                    if app in active_app_list:
                        app_json = {
                            'name': app,
                            'status': 'ok',
                            'monitor_item': True
                        }
                        active_app_list.remove(app)
                    else:
                        app_json = {
                            'name': app,
                            'status': 'nok',
                            'monitor_item': True
                        }
                        app_status = 'nok'
                        app_fail_reason.append(app_json)
                    app_list.append(app_json)

                for app in active_app_list:
                    app_json = {
                        'name': app,
                        'status': 'ok',
                        'monitor_item': False
                    }
                    app_list.append(app_json)

            except:
                LOG.exception()
                LOG.error("\'%s\' ONOS Check Error(apps)", node_ip)
                app_status = 'fail'

        else:
            LOG.error("\'%s\' ONOS Check Error(apps)", node_ip)
            link_status = 'fail'

        # store to db
        try:
            sql = 'UPDATE ' + DB.ONOS_TBL + \
                  ' SET ' + \
                  ' cluster = \"' + str(node_list) + '\",' \
                  ' device = \"' + str(device_list) + '\",' \
                  ' link = \"' + str(link_list) + '\",' \
                  ' app = \"' + str(app_list) + '\"' \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE ONOS CONNECTION INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] ONOS CONNECTION DB Update Fail.')
        except:
            LOG.exception()

    except:
        LOG.exception()
        cluster_status = 'fail'
        device_status = 'fail'
        link_status = 'fail'
        app_status = 'fail'

    return node_status, device_status, link_status, app_status, node_fail_reason, device_fail_reason, link_fail_reason, app_fail_reason