Ejemplo n.º 1
0
def get_ha_stats(ha_dic):
    try:
        ha_status = 'ok'
        ha_ratio = 'ok'

        list_reason = []
        ratio_reason = []

        frontend = 0
        backend = 0

        for key in dict(ha_dic).keys():
            for line in ha_dic[key]:
                host = dict(line)['name']
                status = dict(line)['node_sts']

                if host == 'FRONTEND':
                    if not 'OPEN' in status:
                        list_json = {
                            'key': key,
                            'hostname': host,
                            'status': 'nok'
                        }
                        list_reason.append(list_json)
                        ha_status = 'nok'

                    frontend = int(dict(line)['req_count'])
                else:
                    if not 'UP' in status:
                        list_json = {
                            'key': key,
                            'hostname': host,
                            'status': 'nok'
                        }
                        list_reason.append(list_json)
                        ha_status = 'nok'

                    backend = backend + int(dict(line)['succ_count'])

        ratio = float(backend) * 100 / frontend

        if ratio < float(CONF.alarm()['ha_proxy']):
            ha_ratio = 'nok'
            ratio_reason.append(str(format(ratio, '.2f')))
    except:
        LOG.exception()
        ha_status = 'fail'
        ha_ratio = 'fail'

    return ha_status, ha_ratio, list_reason, ratio_reason
Ejemplo n.º 2
0
def get_grade(item, value):
    critical, major, minor = (CONF.alarm()[item])

    if value == '-1':
        return 'fail'

    if float(value) >= float(critical):
        return 'critical'
    elif float(value) >= float(major):
        return 'major'
    elif float(value) >= float(minor):
        return 'minor'

    return 'normal'
Ejemplo n.º 3
0
def get_internal_traffic(conn, db_log, node_name, node_ip, user_name, sub_type, rx_count, patch_tx, pre_stat):
    try:
        status = 'ok'
        in_packet = 0
        out_packet = 0

        reason_list = []
        desc = ''

        if sub_type == 'COMPUTE':
            flow_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo ovs-ofctl -O OpenFlow13 dump-flows br-int')

            inport_cnt = 0
            gw_cnt = 0
            output_cnt = 0

            if flow_rt is not None:
                for line in flow_rt.splitlines():
                    tmp = line.split(',')
                    if 'in_port' in line:
                        inport_cnt = inport_cnt + int(tmp[3].split('=')[1])
                    elif 'output' in line:
                        output_cnt = output_cnt + int(tmp[3].split('=')[1])
                    elif 'actions=group' in line:
                        gw_cnt = gw_cnt + int(tmp[3].split('=')[1])

                in_packet = inport_cnt + rx_count
                out_packet = gw_cnt + output_cnt

                port_json = {'vm_tx': inport_cnt, 'vxlan_rx': rx_count, 'out_gw': gw_cnt, 'output': output_cnt}
            else:
                port_json = {'vm_tx': -1, 'vxlan_rx': -1, 'out_gw': -1, 'output': -1}
                status = 'fail'

        else:
            port_json = {'vxlan_rx': rx_count, 'patch-integ': patch_tx}

            if patch_tx == -1:
                status = 'fail'
            else:
                in_packet = rx_count
                out_packet = patch_tx

        for_save_in = in_packet
        for_save_out = out_packet

        if not dict(pre_stat).has_key(node_name + '_internal'):
            status = '-'
            vxlan_json = {'port_stat_in_out': port_json, 'period': CONF.watchdog()['interval'],
                          'ratio': 0, 'current_rx': -1, 'current_tx': -1,
                          'description': desc, 'threshold': CONF.alarm()['internal_traffic_ratio'], 'status': status}
        elif status == 'ok':
            in_packet = in_packet - int(dict(pre_stat)[node_name + '_internal']['in_packet'])
            out_packet = out_packet - int(dict(pre_stat)[node_name + '_internal']['out_packet'])

            if in_packet == 0 and out_packet == 0:
                ratio = 100
            elif in_packet <= 0 or out_packet < 0:
                LOG.info('Internal Traffic Ratio Fail.')
                ratio = 0
            else:
                ratio = float(out_packet) * 100 / in_packet

            LOG.info('Internal Traffic Ratio = ' + str(ratio))
            desc = 'Internal Traffic Ratio = ' + str(ratio) + '(' + str(out_packet) + '/' + str(in_packet) + ')'

            if ratio < float(CONF.alarm()['internal_traffic_ratio']):
                status = 'nok'

            vxlan_json = {'port_stat_in_out': port_json, 'period': CONF.watchdog()['interval'],
                          'ratio': format(ratio, '.2f'), 'current_rx': in_packet, 'current_tx': out_packet,
                          'description': desc, 'threshold': CONF.alarm()['internal_traffic_ratio'], 'status': status}

        in_out_dic = dict()
        in_out_dic['in_packet'] = for_save_in
        in_out_dic['out_packet'] = for_save_out
        pre_stat[node_name + '_internal'] = in_out_dic

        try:
            sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
                  ' SET internal_traffic = \"' + str(vxlan_json) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE INTERNAL TRAFFIC INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] INTERNAL TRAFFIC DB Update Fail.')
        except:
            LOG.exception()
    except:
        LOG.exception()
        status = 'fail'

    if not status == 'ok':
        reason_list.append(vxlan_json)

    return status, pre_stat, reason_list
Ejemplo n.º 4
0
def get_node_traffic(conn, db_log, node_name, rx_dic, tx_dic, total_rx, total_tx, err_info, pre_stat):
    try:
        status = 'ok'
        reason_list = []

        pre_total_rx = total_rx
        pre_total_tx = total_tx

        # check minimum packet count
        sql = 'SELECT data_ip FROM ' + DB.OPENSTACK_TBL + ' WHERE nodename = \'' + node_name + '\''
        data_ip = conn.cursor().execute(sql).fetchone()[0]

        sql = 'SELECT ip_addr FROM ' + DB.NODE_INFO_TBL + ' WHERE type = \'ONOS\''
        nodes_info = conn.cursor().execute(sql).fetchall()

        min_rx = 0
        if len(nodes_info) == 0:
            LOG.info('Fail to load onos list')
            status = 'fail'
        else:
            for ip in nodes_info:
                flows_rt = SshCommand.onos_ssh_exec(ip[0], '\"flows --filter \'{tunnelDst=' + data_ip + '}\' --short\"')

                if flows_rt is not None:
                    for line in flows_rt.splitlines():
                        if 'tunnelDst' in line:
                            min_rx = min_rx + int(line.split(',')[2].split('=')[1])
                    break

        if not dict(pre_stat).has_key(node_name + '_VXLAN'):
            status = '-'
            ratio = -1
        else:
            total_rx = total_rx - int(dict(pre_stat)[node_name + '_VXLAN']['total_rx'])
            total_tx = total_tx - int(dict(pre_stat)[node_name + '_VXLAN']['total_tx'])
            cur_min = min_rx - int(dict(pre_stat)[node_name + '_VXLAN']['min_rx'])

            if total_rx == 0 and total_tx == 0:
                ratio = 100
            elif total_tx <= 0 or total_tx < 0:
                LOG.info('Node Traffic Ratio Fail.')
                ratio = 0
            else:
                ratio = float(total_rx) * 100 / total_tx

        LOG.info('Node Traffic Ratio = ' + str(ratio))

        port_json = {'rx': rx_dic[node_name], 'minimum_rx': min_rx, 'rx_drop': err_info['rx_drop'], 'rx_errs': err_info['rx_err'],
                      'tx': tx_dic[node_name], 'tx_drop': err_info['tx_drop'], 'tx_errs': err_info['tx_err']}

        description = ''

        if not status == '-':
            description = 'Ratio of success for all nodes = ' + str(ratio)  + ' (' + str(total_rx) + ' / ' + str(total_tx) + ')'

            if ratio < float(CONF.alarm()['node_traffic_ratio']):
                LOG.info('[NODE TRAFFIC] ratio nok')
                status = 'nok'

            if total_rx < cur_min:
                LOG.info('CUR_MIN_RX = ' + str(cur_min) + ', CUR_RX = ' + str(total_rx) + ', Less than rx minimum.')
                status = 'nok'

            if err_info['rx_drop'] - int(dict(pre_stat)[node_name + '_VXLAN']['rx_drop']) > 0:
                LOG.info('[NODE TRAFFIC] rx_drop nok')
                status = 'nok'

            if err_info['rx_err'] - int(dict(pre_stat)[node_name + '_VXLAN']['rx_err']) > 0:
                LOG.info('[NODE TRAFFIC] rx_err nok')
                status = 'nok'

            if err_info['tx_drop'] - int(dict(pre_stat)[node_name + '_VXLAN']['tx_drop']) > 0:
                LOG.info('[NODE TRAFFIC] tx_drop nok')
                status = 'nok'

            if err_info['tx_err'] - int(dict(pre_stat)[node_name + '_VXLAN']['tx_err']) > 0:
                LOG.info('[NODE TRAFFIC] tx_err nok')
                status = 'nok'

        in_out_dic = dict()
        in_out_dic['total_rx'] = pre_total_rx
        in_out_dic['total_tx'] = pre_total_tx

        in_out_dic['min_rx'] = min_rx

        in_out_dic['rx_drop'] = err_info['rx_drop']
        in_out_dic['rx_err'] = err_info['rx_err']
        in_out_dic['tx_drop'] = err_info['tx_drop']
        in_out_dic['tx_err'] = err_info['tx_err']

        pre_stat[node_name + '_VXLAN'] = in_out_dic
    except:
        LOG.exception()
        status = 'fail'

    vxlan_json = {'port_stat_vxlan': port_json, 'period': CONF.watchdog()['interval'],
                  'ratio': format(ratio, '.2f'), 'current_rx': total_rx, 'current_tx': total_tx,
                  'description': description, 'threshold': CONF.alarm()['node_traffic_ratio'], 'status': status}

    try:
        sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
              ' SET vxlan_traffic = \"' + str(vxlan_json) + '\"' + \
              ' WHERE nodename = \'' + node_name + '\''
        db_log.write_log('----- UPDATE VXLAN STAT INFO -----\n' + sql)

        if DB.sql_execute(sql, conn) != 'SUCCESS':
            db_log.write_log('[FAIL] VXLAN STAT DB Update Fail.')
    except:
        LOG.exception()

    if not status == 'ok':
        reason_list.append(vxlan_json)

    return status, pre_stat, reason_list
Ejemplo n.º 5
0
def get_gw_ratio_compute(conn, db_log, node_ip, node_name, pre_stat):
    status = 'ok'
    reason = []

    try:
        sql = 'SELECT ' + DB.ONOS_TBL + '.nodename, nodelist, ip_addr' + ' FROM ' + DB.ONOS_TBL + \
                ' INNER JOIN ' + DB.NODE_INFO_TBL + ' ON ' + DB.ONOS_TBL + '.nodename = ' + DB.NODE_INFO_TBL + '.nodename'

        nodes_info = conn.cursor().execute(sql).fetchall()

        if len(nodes_info) == 0:
            LOG.info('Fail to load onos list')
            return 'fail', pre_stat, reason

        manage_ip = ''
        hostname = ''
        for nodename, nodelist, ip in nodes_info:
            if not nodelist == 'none':
                for node_info in eval(nodelist):
                    try:
                        if dict(node_info)['management_ip'] == node_ip:
                            manage_ip = ip
                            hostname = dict(node_info)['hostname']
                    except:
                        manage_ip = ''

                    if not manage_ip == '':
                        break
            if not manage_ip == '':
                break

        if hostname == '':
            LOG.info('Can not find hostname')
            return 'fail', pre_stat, reason

        try:
            sql = 'SELECT of_id FROM ' + DB.OPENSTACK_TBL + ' WHERE hostname = \'' + str(hostname) + '\''
            LOG.info(sql)
            node_info = conn.cursor().execute(sql).fetchone()

            of_id = node_info[0]
        except:
            LOG.exception()
            LOG.info('Can not find of_id')
            return 'fail', pre_stat, reason

        group_rt = SshCommand.onos_ssh_exec(manage_ip, 'groups')

        total_cnt = 0
        gw_list = []
        if group_rt is not None:
            for line in group_rt.splitlines():
                if of_id in line:
                    tmp = line.split(',')

                    for col in tmp:
                        if 'packets=' in col:
                            total_cnt = total_cnt + int(col.split('=')[1])
                            gw_list.append(int(col.split('=')[1]))

        str_ratio = ''

        if not dict(pre_stat).has_key(node_name + '_GW'):
            status = '-'
            json_ratio = {'ratio': '-', 'status': status, 'period':CONF.watchdog()['interval'], 'status': status}
        else:
            i = 0
            for gw in gw_list:
                cur_gw = gw - pre_stat[node_name + '_GW']['gw_list'][i]
                cur_total = total_cnt - pre_stat[node_name + '_GW']['gw_total']

                LOG.info('cur_gw = ' + str(cur_gw))
                LOG.info('cur_total = ' + str(cur_total))

                if cur_gw == 0 and cur_total == 0:
                    ratio = 100/len(gw_list)
                elif cur_gw <= 0 or cur_total <= 0:
                    ratio = 0
                else:
                    ratio = float(cur_gw) * 100 / cur_total

                i = i + 1
                str_ratio = str_ratio + str(ratio) + ':'

                if ratio < float(CONF.alarm()['gw_ratio']):
                    status = 'nok'

            json_ratio = {'ratio': str_ratio.rstrip(':'), 'status': status, 'period':CONF.watchdog()['interval'], 'status': status}
            LOG.info('[COMPUTE] ' + 'GW_RATIO = ' + str_ratio.rstrip(':'))

        try:
            sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
                  ' SET gw_ratio = \"' + str(json_ratio) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE TRAFFIC GW INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] TRAFFIC GW DB Update Fail.')
        except:
            LOG.exception()

        in_out_dic = dict()
        in_out_dic['gw_list'] = gw_list
        in_out_dic['gw_total'] = total_cnt

        pre_stat[node_name + '_GW'] = in_out_dic

    except:
        LOG.exception()
        status = 'fail'

    if not status == 'ok':
        reason.append(json_ratio)

    return status, pre_stat, reason
Ejemplo n.º 6
0
def get_gw_ratio_gateway(conn, db_log, node_ip, node_name, rx, gw_rx_sum, pre_stat):
    status = 'ok'
    reason = []

    try:
        sql = 'SELECT ' + DB.ONOS_TBL + '.nodename, nodelist, ip_addr' + ' FROM ' + DB.ONOS_TBL + \
                ' INNER JOIN ' + DB.NODE_INFO_TBL + ' ON ' + DB.ONOS_TBL + '.nodename = ' + DB.NODE_INFO_TBL + '.nodename'

        nodes_info = conn.cursor().execute(sql).fetchall()

        if len(nodes_info) == 0:
            LOG.info('Fail to load onos list')
            return 'fail', pre_stat, reason

        # search data_ip
        data_ip = ''
        manage_ip = ''
        cpt_to_gw_packet = 0
        for nodename, nodelist, ip in nodes_info:
            if not nodelist == 'none':
                for node_info in eval(nodelist):
                    try:
                        if dict(node_info)['management_ip'] == node_ip:
                            manage_ip = ip
                            data_ip = dict(node_info)['data_ip']
                    except:
                        manage_ip = ''

                    if not manage_ip == '':
                        break
            if not manage_ip == '':
                break

        if data_ip == '':
            LOG.info('Can not find data ip')
            return 'fail', pre_stat, reason

        group_rt = SshCommand.onos_ssh_exec(manage_ip, 'groups')

        if group_rt is not None:
            for line in group_rt.splitlines():
                if '{tunnelDst=' + data_ip + '}' in line:
                    tmp = line.split(',')

                    for col in tmp:
                        if 'packets=' in col:
                            cpt_to_gw_packet = cpt_to_gw_packet + int(col.split('=')[1])

        if not dict(pre_stat).has_key(node_name + '_GW'):
            status = '-'
            json_ratio = {'current_rx': '-', 'current_compute_tx': '-', 'current_total': '-',
                          'ratio': '-',
                          'period': CONF.watchdog()['interval'], 'status': status, 'packet_loss': False,
                          'description': ''}
        else:
            cur_rx = rx - int(dict(pre_stat)[node_name + '_GW']['rx'])
            cur_total = gw_rx_sum - int(dict(pre_stat)[node_name + '_GW']['gw_rx_sum'])
            cur_packet = cpt_to_gw_packet - int(dict(pre_stat)[node_name + '_GW']['cpt_to_gw_packet'])

            if cur_rx == 0 and cur_total == 0:
                ratio = 100
            elif cur_rx <= 0 or cur_total < 0:
                ratio = 0
            else:
                ratio = float(cur_rx) * 100 / cur_total

            desc = 'GW RATIO = ' + str(ratio) + ' (' + str(cur_rx) + ' / ' + str(cur_total) + ')'

            loss_flag = False
            if cur_rx < cur_packet:
                LOG.info('GW Ratio Fail. (Data loss)')
                loss_flag = True

            LOG.info('GW Ratio = ' + str(ratio))

            if ratio < float(CONF.alarm()['gw_ratio']) or cur_rx < cur_packet:
                status = 'nok'

            json_ratio = {'current_rx': cur_rx, 'current_compute_tx': cur_packet, 'current_total': cur_total, 'ratio': format(ratio, '.2f'),
                          'period':CONF.watchdog()['interval'], 'status': status, 'packet_loss': loss_flag, 'description': desc}

        try:
            sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
                  ' SET gw_ratio = \"' + str(json_ratio) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE TRAFFIC GW INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] TRAFFIC GW DB Update Fail.')
        except:
            LOG.exception()

        in_out_dic = dict()
        in_out_dic['rx'] = rx
        in_out_dic['gw_rx_sum'] = gw_rx_sum
        in_out_dic['cpt_to_gw_packet'] = cpt_to_gw_packet

        pre_stat[node_name + '_GW'] = in_out_dic
    except:
        LOG.exception()
        status = 'fail'

    if not status == 'ok':
        reason.append(json_ratio)

    return status, pre_stat, reason
Ejemplo n.º 7
0
def controller_traffic_check(conn, db_log, node_name, node_ip, pre_stat):
    try:
        summary_rt = SshCommand.onos_ssh_exec(node_ip, 'summary')

        in_packet = 0
        out_packet = 0

        cpman_stat_list = list()
        controller_traffic = 'ok'
        reason = []

        desc = ''
        ratio = 0

        if summary_rt is not None:
            data_ip = str(summary_rt).split(',')[0].split('=')[1]

            try:
                sql = 'SELECT hostname, of_id FROM ' + DB.OPENSTACK_TBL
                nodes_info = conn.cursor().execute(sql).fetchall()

                for hostname, of_id in nodes_info:
                    cmd = 'cpman-stats-list ' + data_ip + ' control_message ' + of_id

                    stat_rt = SshCommand.onos_ssh_exec(node_ip, cmd)

                    rest_json = {
                        'hostname': str(hostname),
                        'of_id': str(of_id),
                        'inbound': '-',
                        'outbound': '-',
                        'mod': '-',
                        'removed': '-',
                        'request': '-',
                        'reply': '-'
                    }

                    if stat_rt is not None:
                        if not str(stat_rt).startswith('Failed'):
                            for line in stat_rt.splitlines():
                                type = line.split(',')[0].split('=')[1]
                                avg_cnt = int(line.split(',')[2].split('=')[1])

                                if type == 'INBOUND_PACKET':
                                    in_packet = in_packet + avg_cnt
                                    in_p = avg_cnt
                                elif type == 'OUTBOUND_PACKET':
                                    out_packet = out_packet + avg_cnt
                                    out_p = avg_cnt
                                elif type == 'FLOW_MOD_PACKET':
                                    mod_p = avg_cnt
                                elif type == 'FLOW_REMOVED_PACKET':
                                    remove_p = avg_cnt
                                elif type == 'REQUEST_PACKET':
                                    req_p = avg_cnt
                                elif type == 'REPLY_PACKET':
                                    res_p = avg_cnt

                            rest_json = {
                                'hostname': str(hostname),
                                'of_id': str(of_id),
                                'inbound': in_p,
                                'outbound': out_p,
                                'mod': mod_p,
                                'removed': remove_p,
                                'request': req_p,
                                'reply': res_p
                            }
                        else:
                            reason.append(rest_json)
                            controller_traffic = 'fail'
                    else:
                        reason.append(rest_json)
                        controller_traffic = 'fail'

                    cpman_stat_list.append(rest_json)

                for_save_in = in_packet
                for_save_out = out_packet

                if not dict(pre_stat).has_key(node_name):
                    controller_traffic = '-'

                    in_out_dic = dict()
                    in_out_dic['in_packet'] = for_save_in
                    in_out_dic['out_packet'] = for_save_out

                    pre_stat[node_name] = in_out_dic
                else:
                    in_packet = in_packet - int(
                        dict(pre_stat)[node_name]['in_packet'])
                    out_packet = out_packet - int(
                        dict(pre_stat)[node_name]['out_packet'])

                    if in_packet <= CONF.alarm(
                    )['controller_traffic_minimum_inbound']:
                        desc = 'Minimum increment for status check = ' + str(
                            CONF.alarm()['controller_traffic_minimum_inbound'])
                        controller_traffic = '-'
                    else:
                        if in_packet == 0 and out_packet == 0:
                            ratio = 100
                        elif in_packet <= 0 or out_packet < 0:
                            LOG.info('Controller Traffic Ratio Fail.')
                            ratio = 0
                        else:
                            ratio = float(out_packet) * 100 / in_packet

                        LOG.info('[CPMAN][' + node_name +
                                 '] Controller Traffic Ratio = ' + str(ratio) +
                                 '(' + str(out_packet) + '/' + str(in_packet) +
                                 ')')
                        desc = 'Controller Traffic Ratio = ' + str(
                            ratio) + '(' + str(out_packet) + '/' + str(
                                in_packet) + ')\n'

                        if ratio < float(
                                CONF.alarm()['controller_traffic_ratio']):
                            controller_traffic = 'nok'

                        in_out_dic = dict()
                        in_out_dic['in_packet'] = for_save_in
                        in_out_dic['out_packet'] = for_save_out

                        pre_stat[node_name] = in_out_dic
            except:
                LOG.exception()
                controller_traffic = 'fail'
        else:
            controller_traffic = 'fail'

        controller_json = {
            'status':
            controller_traffic,
            'stat_list':
            cpman_stat_list,
            'minimum_inbound_packet':
            CONF.alarm()['controller_traffic_minimum_inbound'],
            'current_inbound_packet':
            in_packet,
            'current_outbound_packet':
            out_packet,
            'period':
            CONF.watchdog()['interval'],
            'ratio':
            format(ratio, '.2f'),
            'description':
            desc,
            'threshold':
            CONF.alarm()['controller_traffic_ratio']
        }

        if not controller_traffic == 'ok':
            reason.append(controller_json)

        try:
            sql = 'UPDATE ' + DB.ONOS_TBL + \
                  ' SET traffic_stat = \"' + str(controller_json) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE CONTROLLER TRAFFIC INFO -----\n' +
                             sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] CONTROLLER TRAFFIC Update Fail.')
        except:
            LOG.exception()
    except:
        LOG.exception()
        controller_traffic = 'fail'

    return controller_traffic, pre_stat, reason
Ejemplo n.º 8
0
def periodic(conn, pre_stat, db_log):
    try:
        cur_info = {}
        LOG.info('Periodic checking %s', str(CONF.watchdog()['check_system']))

        try:
            node_list = cmd_proc.get_node_list('all', 'nodename, ip_addr, username, type, sub_type')

            if not node_list:
                LOG.info("Not Exist Node data ...")
                return
        except:
            LOG.exception()
            return

        # Read cur alarm status
        sql = 'SELECT nodename, item, grade FROM ' + DB.EVENT_TBL

        db_log.write_log(sql)
        cur_grade = conn.cursor().execute(sql).fetchall()

        for nodename, item, grade in cur_grade:
            if not cur_info.has_key(nodename):
                cur_info[nodename] = {}

            cur_info[nodename][item] = grade

        # check HA, once
        if 'HA' in CONF.watchdog()['check_system']:
            ha_dic = chk_ha.onos_ha_check(conn, db_log)
            global_ha_svc, global_ha_ratio, global_svc_reason, global_ha_ratio_reason = chk_ha.get_ha_stats(ha_dic)

        # check GW ratio
        gw_total = 0

        # check node traffic
        rx_total = 0
        tx_total = 0

        openstack_rx_dic = dict()
        openstack_tx_dic = dict()
        rx_tx_err_info = dict()
        patch_tx_dic = dict()

        for node_name, node_ip, user_name, type, sub_type in node_list:
            if type.upper() == 'OPENSTACK':
                openstack_rx_dic[node_name], openstack_tx_dic[node_name], rx_tx_err_info[node_name], patch_tx_dic[node_name] = chk_openstack.rx_tx_check(user_name, node_ip)

                if openstack_rx_dic[node_name] > 0:
                    rx_total = rx_total + openstack_rx_dic[node_name]

                if openstack_tx_dic[node_name] > 0:
                    tx_total = tx_total + openstack_tx_dic[node_name]

                if sub_type == 'GATEWAY':
                    if openstack_rx_dic[node_name] > 0:
                        gw_total = gw_total + openstack_rx_dic[node_name]

        for node_name, node_ip, user_name, type, sub_type in node_list:
            LOG.info('------------------------------------ ' + node_name + ' START ------------------------------------')

            cpu = '-1'
            memory = '-1'
            disk = '-1'

            onos_app = 'fail'
            onos_rest = 'fail'

            v_router = 'fail'

            xos_status = 'fail'
            synchronizer_status = 'fail'

            swarm_node = 'fail'
            swarm_svc = 'fail'

            ha_svc = 'fail'
            ha_ratio = 'fail'

            openstack_node = 'fail'

            onos_of = 'fail'
            onos_cluster = 'fail'

            traffic_gw = 'fail'
            port_stat_vxlan = 'fail'
            traffic_controller = 'fail'
            traffic_internal = 'fail'

            # check ping
            network = net_check(node_ip)

            # occur event (rest)
            # 1. ping check
            reason = []
            if network == 'nok':
                reason.append('ping transmit failed')

            network = alarm_event.process_event(conn, db_log, node_name, type, 'NETWORK', cur_info[node_name]['NETWORK'], network, reason)

            if network == 'ok':
                if type.upper() == 'ONOS':
                    # check node
                    openstack_node, reason = chk_onos.onos_node_check(conn, db_log, node_name, node_ip)
                    openstack_node = alarm_event.process_event(conn, db_log, node_name, type, 'OPENSTACK_NODE',
                                                               cur_info[node_name]['OPENSTACK_NODE'], openstack_node, reason)
                    LOG.info('[' + node_name + '][OPENSTACK_NODE][' + openstack_node + ']' + str(reason))

                    # check app
                    onos_app, reason = chk_onos.onos_app_check(conn, db_log, node_name, node_ip)
                    onos_app = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_APP',
                                                         cur_info[node_name]['ONOS_APP'], onos_app, reason)
                    LOG.info('[' + node_name + '][ONOS_APP][' + onos_app + ']' + str(reason))

                    # check connection
                    onos_of, onos_cluster, of_reason, cluster_reason = chk_onos.onos_conn_check(conn, db_log, node_name, node_ip)
                    onos_of = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_OPENFLOW',
                                                        cur_info[node_name]['ONOS_OPENFLOW'], onos_of, of_reason)
                    onos_cluster = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_CLUSTER',
                                                             cur_info[node_name]['ONOS_CLUSTER'], onos_cluster, cluster_reason)
                    LOG.info('[' + node_name + '][ONOS_OPENFLOW][' + onos_of + ']' + str(of_reason))
                    LOG.info('[' + node_name + '][ONOS_CLUSTER][' + onos_cluster + ']' + str(cluster_reason))

                    # check web
                    onos_rest, reason = chk_onos.onos_rest_check(conn, db_log, node_name, node_ip)
                    onos_rest = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_REST',
                                                          cur_info[node_name]['ONOS_REST'], onos_rest, reason)
                    LOG.info('[' + node_name + '][ONOS_REST][' + onos_rest + ']' + str(reason))

                    # check controller traffic
                    traffic_controller, pre_stat, reason = chk_onos.controller_traffic_check(conn, db_log, node_name, node_ip, pre_stat)
                    traffic_controller = alarm_event.process_event(conn, db_log, node_name, type, 'TRAFFIC_CONTROLLER',
                                                                   cur_info[node_name]['TRAFFIC_CONTROLLER'],
                                                                   traffic_controller, reason)
                    LOG.info('[' + node_name + '][ONOS_TRAFFIC_CONTROLLER][' + traffic_controller + ']' + str(reason))

                elif type.upper() == 'HA':
                    ha_svc = global_ha_svc
                    ha_svc = alarm_event.process_event(conn, db_log, node_name, type, 'HA_SVC', cur_info[node_name]['HA_SVC'],
                                                       ha_svc, global_svc_reason)
                    LOG.info('[' + node_name + '][HA_SVC][' + ha_svc + ']' + str(global_svc_reason))

                    ha_ratio = global_ha_ratio
                    ha_ratio = alarm_event.process_event(conn, db_log, node_name, type, 'HA_RATIO', cur_info[node_name]['HA_RATIO'],
                                                         ha_ratio, global_ha_ratio_reason)
                    LOG.info('[' + node_name + '][HA_RATIO][' + ha_ratio + ']' + str(global_ha_ratio_reason))

                # check xos (status/synchronizer)
                elif type.upper() == 'XOS':
                    xos_status, reason = chk_xos.xos_status_check(conn, db_log, node_name)

                    xos_status = alarm_event.process_event(conn, db_log, node_name, type, 'XOS_SVC',
                                                      cur_info[node_name]['XOS_SVC'], xos_status, reason)

                    LOG.info('[' + node_name + '][XOS_SVC][' + xos_status + ']' + str(reason))

                    synchronizer_status, reason = chk_xos.xos_sync_check(conn, db_log, node_name)

                    synchronizer_status = alarm_event.process_event(conn, db_log, node_name, type, 'SYNCHRONIZER',
                                                           cur_info[node_name]['SYNCHRONIZER'], synchronizer_status, reason)

                    LOG.info('[' + node_name + '][SYNCHRONIZER][' + synchronizer_status + ']' + str(reason))

                    # check swarm (app/node)
                    swarm_manager = chk_swarm.find_swarm_manager()

                    swarm_node, reason = chk_swarm.swarm_node_check(conn, db_log, node_name, user_name, node_ip, swarm_manager)
                    swarm_node = alarm_event.process_event(conn, db_log, node_name, type, 'SWARM_NODE',
                    cur_info[node_name]['SWARM_NODE'], swarm_node, reason)

                    LOG.info('[' + node_name + '][SWARM_NODE][' + swarm_node + ']' + str(reason))

                    swarm_svc, reason = chk_swarm.swarm_service_check(conn, db_log, node_name, user_name, node_ip,
                                                                    swarm_manager)
                    swarm_svc = alarm_event.process_event(conn, db_log, node_name, type, 'SWARM_SVC',
                                                           cur_info[node_name]['SWARM_SVC'], swarm_svc, reason)

                    LOG.info('[' + node_name + '][SWARM_SVC][' + swarm_svc + ']' + str(reason))

                    # add reason
                    #reason = []
                    #swarm_svc = alarm_event.process_event(conn, db_log, node_name, type, 'SWARM_SVC',
                                                          #cur_info[node_name]['SWARM_SVC'], swarm_svc, reason)

                # check vrouter, gw_ratio
                elif type.upper() == 'OPENSTACK':
                    port_stat_vxlan, pre_stat, reason = chk_openstack.get_node_traffic(conn, db_log, node_name, openstack_rx_dic,
                                                                  openstack_tx_dic, rx_total, tx_total, rx_tx_err_info[node_name], pre_stat)
                    port_stat_vxlan = alarm_event.process_event(conn, db_log, node_name, type, 'PORT_STAT_VXLAN',
                                                                cur_info[node_name]['PORT_STAT_VXLAN'], port_stat_vxlan, reason)
                    LOG.info('[' + node_name + '][PORT_STAT_VXLAN][' + port_stat_vxlan + ']' + str(reason))

                    traffic_internal, pre_stat, reason = chk_openstack.get_internal_traffic(conn, db_log, node_name, node_ip, user_name, sub_type,
                                                                          openstack_rx_dic[node_name], patch_tx_dic[node_name], pre_stat)
                    traffic_internal = alarm_event.process_event(conn, db_log, node_name, type, 'TRAFFIC_INTERNAL',
                                                                 cur_info[node_name]['TRAFFIC_INTERNAL'],
                                                                 traffic_internal, reason)
                    LOG.info('[' + node_name + '][TRAFFIC_INTERNAL][' + traffic_internal + ']' + str(reason))

                    if sub_type.upper() == 'GATEWAY':
                        v_router, reason = chk_openstack.vrouter_check(conn, db_log, node_name, user_name, node_ip)
                        v_router = alarm_event.process_event(conn, db_log, node_name, type, 'GATEWAY',
                                                             cur_info[node_name]['GATEWAY'], v_router, reason)
                        LOG.info('[' + node_name + '][GATEWAY][' + v_router + ']' + str(reason))

                        traffic_gw, pre_stat, reason = chk_openstack.get_gw_ratio_gateway(conn, db_log, node_ip, node_name, openstack_rx_dic[node_name], gw_total, pre_stat)

                    elif sub_type.upper() == 'COMPUTE':
                        v_router = '-'
                        traffic_gw, pre_stat, reason = chk_openstack.get_gw_ratio_compute(conn, db_log, node_ip, node_name, pre_stat)

                    traffic_gw = alarm_event.process_event(conn, db_log, node_name, type, 'TRAFFIC_GW',
                                                           cur_info[node_name]['TRAFFIC_GW'], traffic_gw, reason)
                    LOG.info('[' + node_name + '][TRAFFIC_GW][' + traffic_gw + ']' + str(reason))

                # check resource
                cpu, memory, disk = chk_resource.check_resource(conn, db_log, node_name, user_name, node_ip)

            reason = []
            # 3. resource check (CPU/MEM/DISK)
            cpu_grade = 'fail'
            if CONF.alarm().has_key('cpu'):
                if not alarm_event.is_monitor_item(type, 'CPU'):
                    cpu_grade = '-'
                else:
                    cpu_grade = alarm_event.get_grade('cpu', cpu)
                    if cur_info[node_name]['CPU'] != cpu_grade:
                        reason_json = {'value' : cpu}
                        reason.append(reason_json)
                        alarm_event.occur_event(conn, db_log, node_name, 'CPU', cur_info[node_name]['CPU'], cpu_grade, reason)
                LOG.info('[' + node_name + '][CPU][' + cpu_grade + ']' + str(reason))

            reason = []
            mem_grade = 'fail'
            if CONF.alarm().has_key('memory'):
                if not alarm_event.is_monitor_item(type, 'MEMORY'):
                    mem_grade = '-'
                else:
                    mem_grade = alarm_event.get_grade('memory', memory)
                    if cur_info[node_name]['MEMORY'] != mem_grade:
                        reason_json = {'value': memory}
                        reason.append(reason_json)
                        alarm_event.occur_event(conn, db_log, node_name, 'MEMORY', cur_info[node_name]['MEMORY'], mem_grade, reason)
                LOG.info('[' + node_name + '][MEMORY][' + mem_grade + ']' + str(reason))

            reason = []
            disk_grade = 'fail'
            if CONF.alarm().has_key('disk'):
                if not alarm_event.is_monitor_item(type, 'DISK'):
                    disk_grade = '-'
                else:
                    disk_grade = alarm_event.get_grade('disk', disk)
                    if cur_info[node_name]['DISK'] != disk_grade:
                        reason_json = {'value': disk}
                        reason.append(reason_json)
                        alarm_event.occur_event(conn, db_log, node_name, 'DISK', cur_info[node_name]['DISK'], disk_grade, reason)
                LOG.info('[' + node_name + '][DISK][' + disk_grade + ']' + str(reason))

            try:
                sql = 'UPDATE ' + DB.STATUS_TBL + \
                      ' SET CPU = \'' + cpu_grade + '\',' + \
                      ' MEMORY = \'' + mem_grade + '\',' + \
                      ' DISK = \'' + disk_grade + '\',' + \
                      ' NETWORK = \'' + network + '\',' + \
                      ' ONOS_APP = \'' + onos_app + '\',' + \
                      ' ONOS_REST = \'' + onos_rest + '\',' + \
                      ' ONOS_OPENFLOW = \'' + onos_of + '\',' + \
                      ' ONOS_CLUSTER = \'' + onos_cluster + '\',' + \
                      ' XOS_SVC = \'' + xos_status + '\',' + \
                      ' SYNCHRONIZER = \'' + synchronizer_status + '\',' + \
                      ' SWARM_NODE = \'' + swarm_node + '\',' + \
                      ' OPENSTACK_NODE = \'' + openstack_node + '\',' + \
                      ' SWARM_SVC = \'' + swarm_svc + '\',' + \
                      ' GATEWAY = \'' + v_router + '\',' + \
                      ' HA_SVC = \'' + ha_svc + '\',' + \
                      ' HA_RATIO = \'' + ha_ratio + '\',' + \
                      ' TRAFFIC_GW = \'' + traffic_gw + '\',' + \
                      ' PORT_STAT_VXLAN = \'' + port_stat_vxlan + '\',' + \
                      ' TRAFFIC_CONTROLLER = \'' + traffic_controller + '\',' + \
                      ' TRAFFIC_INTERNAL = \'' + traffic_internal + '\',' + \
                      ' time = \'' + str(datetime.now()) + '\'' + \
                      ' WHERE nodename = \'' + node_name + '\''
                db_log.write_log('----- UPDATE TOTAL SYSTEM INFO -----\n' + sql)

                if DB.sql_execute(sql, conn) != 'SUCCESS':
                    db_log.write_log('[FAIL] TOTAL SYSTEM INFO DB Update Fail.')
            except:
                LOG.exception()
    except:
        LOG.exception()

    return pre_stat
Ejemplo n.º 9
0
def periodic(conn):
    cur_info = {}
    LOG.info("Periodic checking...%s", str(CONF.watchdog()['check_system']))

    try:
        node_list = cmd_proc.get_node_list('all',
                                           'nodename, ip_addr, username')

        if not node_list:
            LOG.info("Not Exist Node data ...")
            return
    except:
        LOG.exception()
        return

    # Read cur alarm status
    sql = 'SELECT nodename, item, grade FROM ' + DB.EVENT_TBL
    LOG.info(sql)
    cur_grade = conn.cursor().execute(sql).fetchall()

    for nodename, item, grade in cur_grade:
        if not cur_info.has_key(nodename):
            cur_info[nodename] = {}

        cur_info[nodename][item] = grade

    for node_name, node_ip, user_name in node_list:
        ping = net_check(node_ip)
        app = 'fail'
        cpu = '-1'
        mem = '-1'
        disk = '-1'

        if ping == 'ok':
            if node_ip in str(CONF.onos()['list']):
                app = onos_app_check(node_ip)
            elif node_ip in str(CONF.xos()['list']):
                app = xos_app_check(node_ip)
            elif node_ip in str(CONF.swarm()['list']):
                app = swarm_app_check(node_ip)
            elif node_ip in str(CONF.openstack()['list']):
                app = openstack_app_check(node_ip)

            cpu = str(resource.get_cpu_usage(user_name, node_ip, True))
            mem = str(resource.get_mem_usage(user_name, node_ip, True))
            disk = str(resource.get_disk_usage(user_name, node_ip, True))

        try:
            sql = 'UPDATE ' + DB.RESOURCE_TBL + \
                  ' SET cpu = \'' + cpu + '\',' + \
                  ' memory = \'' + mem + '\',' + \
                  ' disk = \'' + disk + '\'' \
                  ' WHERE nodename = \'' + node_name + '\''
            LOG.info('Update Resource info = ' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                LOG.error('DB Update Fail.')
        except:
            LOG.exception()

        # occur event (rest)
        # 1. ping check
        if cur_info[node_name]['ping'] != ping:
            occur_event(conn, node_name, 'ping', cur_info[node_name]['ping'],
                        ping)

        # 2. app check
        if cur_info[node_name]['app'] != app:
            occur_event(conn, node_name, 'app', cur_info[node_name]['app'],
                        app)

        # 3. resource check (CPU/MEM/DISK)
        cpu_grade = 'fail'
        if CONF.alarm().has_key('cpu'):
            cpu_grade = get_grade('cpu', cpu)
            if cur_info[node_name]['cpu'] != cpu_grade:
                occur_event(conn, node_name, 'cpu', cur_info[node_name]['cpu'],
                            cpu_grade)

        mem_grade = 'fail'
        if CONF.alarm().has_key('memory'):
            mem_grade = get_grade('memory', mem)
            if cur_info[node_name]['memory'] != mem_grade:
                occur_event(conn, node_name, 'memory',
                            cur_info[node_name]['memory'], mem_grade)

        disk_grade = 'fail'
        if CONF.alarm().has_key('disk'):
            disk_grade = get_grade('disk', disk)
            if cur_info[node_name]['disk'] != disk_grade:
                occur_event(conn, node_name, 'disk',
                            cur_info[node_name]['disk'], disk_grade)

        try:
            sql = 'UPDATE ' + DB.STATUS_TBL + \
                  ' SET cpu = \'' + cpu_grade + '\',' + \
                  ' memory = \'' + mem_grade + '\',' + \
                  ' disk = \'' + disk_grade + '\',' + \
                  ' ping = \'' + ping + '\',' + \
                  ' app = \'' + app + '\',' + \
                  ' time = \'' + str(datetime.now()) + '\'' + \
                  ' WHERE nodename = \'' + node_name + '\''
            LOG.info('Update Status info = ' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                LOG.error('DB Update Fail.')
        except:
            LOG.exception()