Example #1
0
def onos_api_req(node_ip, url_path):
    try:
        url = "http://%s:%d/%s" % (node_ip, CONF.onos()['api_port'], url_path)
        auth = CONF.onos()['api_user_passwd'].split(':')
        timeout = CONF.onos()['api_timeout_sec']

        #LOG.info('ONOS API REQUEST: url=%s auth=%s timeout=%s', url, auth, timeout)
        rsp = requests.get(url, auth=(auth[0], auth[1]), timeout=timeout)
        #LOG.info('ONOS API RESPONSE: status=%s body=%s', str(rsp.status_code), rsp.content)

    except:
        # req timeout
        LOG.exception()
        return -1, None

    if rsp.status_code != 200:
        return -2, None

    try:
        body = json.loads(rsp.content.replace("\'", '"'))
        return rsp.status_code, body

    except:
        LOG.exception()
        return -2, None
Example #2
0
def get_service_list():
    service_list = []

    try:
        url = CONF.xos()['xos_rest_server']
        account = CONF.xos()['xos_rest_account']

        cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/instances/'
        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("Cmd Fail, cause => %s", error)
            return ''

        instance_array = json.loads(output)

        for instance_info in instance_array:
            name = instance_info['instance_name']

            LOG.info('swarm_instance_name = ' + name)

            service_list.append(name)

    except:
        LOG.exception()

    return service_list
Example #3
0
def xos_status_check(conn, db_log, node_name):
    xos_status = 'ok'
    xos_list = []
    fail_reason = []

    try:
        url = CONF.xos()['xos_rest_server']
        account = CONF.xos()['xos_rest_account']

        cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/xoses/'
        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("Cmd Fail, cause => %s", error)
            return 'fail', None

        xos_array = json.loads(output)

        for xos_info in xos_array:
            backend_status = xos_info['backend_status']

            LOG.info('xos_status_backend_status = ' + backend_status)

            tmp = str(backend_status).split('-')

            if tmp[0].strip() == '0':
                status = 'ok'
            else:
                status = 'nok'

            xos_json = {
                'name': xos_info['name'],
                'status': status,
                'description': tmp[1].strip()
            }
            xos_list.append(xos_json)

            if status == 'nok':
                xos_status = 'nok'
                fail_reason.append(xos_json)

            try:
                sql = 'UPDATE ' + DB.XOS_TBL + \
                      ' SET xos_status = \"' + str(xos_list) + '\"' + \
                      ' WHERE nodename = \'' + node_name + '\''
                db_log.write_log('----- UPDATE XOS STATUS INFO -----\n' + sql)

                if DB.sql_execute(sql, conn) != 'SUCCESS':
                    db_log.write_log('[FAIL] XOS STATUS DB Update Fail.')
            except:
                LOG.exception()

    except:
        LOG.exception()
        xos_status = 'fail'

    return xos_status, fail_reason
Example #4
0
def onos_ha_check(conn, db_log):
    try:
        stats_url = CONF.ha()['ha_proxy_server']
        account = CONF.ha()['ha_proxy_account']

        cmd = 'curl --user ' + account + ' --header \'Accept: text/html, application/xhtml+xml, image/jxr, */*\' \"' + stats_url + '\"'
        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("Cmd Fail, cause => %s", error)
            return None
        else:
            report_data = csv.DictReader(output.lstrip('# ').splitlines())

        dic_stat = dict()
        for row in report_data:
            if row['pxname'].strip() == 'stats' or row['svname'].strip(
            ) == 'BACKEND':
                continue

            dtl_list = {
                'name': row['svname'],
                'req_count': row['stot'],
                'succ_count': row['hrsp_2xx'],
                'node_sts': row['status']
            }

            svc_type = row['pxname']

            if (dic_stat.has_key(svc_type)):
                dic_stat[svc_type].append(dtl_list)
            else:
                dic_stat[svc_type] = list()
                dic_stat[svc_type].append(dtl_list)

        try:
            str_dic_stat = str(dic_stat)

            sql = 'UPDATE ' + DB.HA_TBL + \
                  ' SET stats = \"' + str_dic_stat + '\"' + \
                  ' WHERE ha_key = \"' + 'HA' + '\"'
            db_log.write_log('----- UPDATE HA INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] HA DB Update Fail.')
        except:
            LOG.exception()

        return dic_stat
    except:
        LOG.exception()
        return None
Example #5
0
    def authentication(self):
        try:
            if not self.headers.getheader("authorization"):
                self.wfile.write('No Authorization Header\n')
                return False
            else:
                request_auth = self.headers.getheader("authorization")
                id_pw_list = CONF.rest()['user_password']

                try:
                    request_account = base64.b64decode(str(request_auth).split()[-1])

                    for id_pw in id_pw_list:
                        if id_pw.strip() == request_account:
                            LOG.info('[REST-SERVER] AUTH SUCCESS = %s, from %s', id_pw, self.client_address)
                            return True
                except:
                    LOG.exception()

                self.wfile.write('Request Authentication User ID or Password is Wrong \n')
                LOG.info('[REST-SERVER] AUTH FAIL = %s, from %s',
                         base64.b64decode(str(request_auth).split()[-1]), self.client_address)
                return False

        except:
            LOG.exception()
            return False
Example #6
0
def proc_dis_system(node, dummy):
    try:
        result = dict()

        for sys_type in CONF.watchdog()['check_system']:
            event_list = DB.get_event_list(sys_type)

            sql = 'SELECT ' + DB.STATUS_TBL + '.nodename, ' + DB.NODE_INFO_TBL + '.ip_addr, ' + ", ".join(event_list) + ' FROM ' + DB.STATUS_TBL + \
                  ' INNER JOIN ' + DB.NODE_INFO_TBL + ' ON ' + DB.STATUS_TBL + '.nodename = ' + DB.NODE_INFO_TBL + '.nodename WHERE type = \'' + sys_type + '\''

            if not node == 'all':
                sql = sql + ' and ' + DB.NODE_INFO_TBL + '.nodename = \'' + node + '\''

            with DB.connection() as conn:
                nodes_info = conn.cursor().execute(sql).fetchall()
            conn.close()

            for row in nodes_info:
                line = dict()
                line['TYPE'] = sys_type
                line['IP'] = row[1]
                i = 2
                for item in event_list:
                    line[item] = row[i]
                    i = i + 1

                result[row[0]] = line

        return result
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
Example #7
0
def run():
    try:
        server_address = ("", int(CONF.rest()['rest_server_port']))
        httpd = HTTPServer(server_address, RestHandler)
        httpd.serve_forever()
    except:
        print 'Rest Server failed to start'
        LOG.exception()
Example #8
0
def net_check(node):
    if CONF.watchdog()['method'] == 'ping':
        timeout = CONF.watchdog()['timeout']
        if sys.platform == 'darwin':
            timeout = timeout * 1000

        cmd = 'ping -c1 -W%d -n %s' % (timeout, node)

        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("\'%s\' Network Check Error(%d) ", node,
                      result.returncode)
            return 'nok'
        else:
            return 'ok'
Example #9
0
    def auth_pw(self, cli_pw):
        id_pw_list = CONF.rest()['user_password']
        cli_pw = base64.b64decode(cli_pw)

        for id_pw in id_pw_list:
            if id_pw.strip() == cli_pw:
                LOG.info('[REST-SERVER] AUTH SUCCESS = ' + id_pw)
                return True

        LOG.info('[REST-SERVER] AUTH FAIL = ' + cli_pw)
        return False
Example #10
0
def find_swarm_manager():
    hostname = ''

    try:
        url = CONF.xos()['xos_rest_server']
        account = CONF.xos()['xos_rest_account']

        cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/controllers/'
        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("Cmd Fail, cause => %s", error)
            return ''

        controller_array = json.loads(output)

        for controller_info in controller_array:
            auth_url = controller_info['auth_url']
            '''
            backend_status = controller_info['backend_status']

            LOG.info('xos_sync_backend_status = ' + backend_status)

            tmp = str(backend_status).split('-')

            if tmp[0].strip() == '0':
            '''

            LOG.info('swarm_manager_auth_url = ' + auth_url)

            tmp = str(auth_url).split(':')

            hostname = tmp[0]
            break
    except:
        LOG.exception()

    return hostname
Example #11
0
def get_ha_stats(ha_dic):
    try:
        ha_status = 'ok'
        ha_ratio = 'ok'

        list_reason = []
        ratio_reason = []

        frontend = 0
        backend = 0

        for key in dict(ha_dic).keys():
            for line in ha_dic[key]:
                host = dict(line)['name']
                status = dict(line)['node_sts']

                if host == 'FRONTEND':
                    if not 'OPEN' in status:
                        list_json = {
                            'key': key,
                            'hostname': host,
                            'status': 'nok'
                        }
                        list_reason.append(list_json)
                        ha_status = 'nok'

                    frontend = int(dict(line)['req_count'])
                else:
                    if not 'UP' in status:
                        list_json = {
                            'key': key,
                            'hostname': host,
                            'status': 'nok'
                        }
                        list_reason.append(list_json)
                        ha_status = 'nok'

                    backend = backend + int(dict(line)['succ_count'])

        ratio = float(backend) * 100 / frontend

        if ratio < float(CONF.alarm()['ha_proxy']):
            ha_ratio = 'nok'
            ratio_reason.append(str(format(ratio, '.2f')))
    except:
        LOG.exception()
        ha_status = 'fail'
        ha_ratio = 'fail'

    return ha_status, ha_ratio, list_reason, ratio_reason
Example #12
0
def get_grade(item, value):
    critical, major, minor = (CONF.alarm()[item])

    if value == '-1':
        return 'fail'

    if float(value) >= float(critical):
        return 'critical'
    elif float(value) >= float(major):
        return 'major'
    elif float(value) >= float(minor):
        return 'minor'

    return 'normal'
Example #13
0
    def run(self):

        # DB initiation
        DB.db_initiation()

        # Start RESTful server
        try:
            REST_SVR.rest_server_start()
        except:
            print 'Rest Server failed to start'
            LOG.exception()
            sys.exit(1)

        # Periodic monitoring
        if CONF.watchdog()['interval'] == 0:
            LOG.info("--- Not running periodic monitoring ---")
            while True:
                time.sleep(3600)
        else:
            LOG.info("--- Periodic Monitoring Start ---")

            conn = DB.connection()

            while True:
                try:
                    watchdog.periodic(conn)

                    time.sleep(CONF.watchdog()['interval'])
                except:
                    watchdog.push_event('sonawatcher', 'disconnect',
                                        'critical',
                                        'sonawatcher server shutdown',
                                        str(datetime.now()))
                    conn.close()
                    LOG.exception()
                    sys.exit(1)
Example #14
0
def onos_app_check(node):

    app_rt = SshCommand.onos_ssh_exec(node, 'apps -a -s')

    app_active_list = list()
    if app_rt is not None:
        for line in app_rt.splitlines():
            app_active_list.append(line.split(".")[2].split()[0])
        if set(CONF.onos()['app_list']).issubset(app_active_list):
            return 'ok'
        else:
            LOG.error("\'%s\' Application Check Error", node)
            return 'nok'
    else:
        LOG.error("\'%s\' Application Check Error", node)
        return 'nok'
Example #15
0
def send_response_traffic_test_old(cond, auth):
    trace_result_data = {}

    try:
        is_success, result = trace.traffic_test_old(cond)

        if is_success:
            trace_result_data['result'] = 'SUCCESS'
        else:
            trace_result_data['result'] = 'FAIL'
            # trace_result_data['fail_reason'] = 'The source ip does not exist.'

        if result != None:
            trace_result_data['traffic_test_result'] = result

        trace_result_data['transaction_id'] = cond['transaction_id']
        try:
            LOG.info('%s',
                     json.dumps(trace_result_data, sort_keys=True, indent=4))
        except:
            pass

        req_body_json = json.dumps(trace_result_data)

        try:
            url = str(cond['app_rest_url'])
            #requests.post(str(url), headers=header, data=req_body_json, timeout=2)

            if str(auth).startswith('Basic '):
                auth = str(auth).split(' ')[1]

            cmd = 'curl -X POST -u \'' + CONF.onos(
            )['rest_auth'] + '\' -H \'Content-Type: application/json\' -d \'' + str(
                req_body_json) + '\' ' + url
            LOG.error('%s', 'curl = ' + cmd)
            result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
            result.communicate()

            if result.returncode != 0:
                # Push noti does not respond
                pass
        except:
            LOG.exception()
            pass

    except:
        LOG.exception()
Example #16
0
    def exit(self):
        try:
            pf = file(CONF.get_pid_file(), 'r')
            pid = int(pf.read().strip())
            pf.close()

            LOG.info("--- Daemon STOP [fail to check rest server] ---")

            try:
                LOG.info('PID = ' + str(pid))
                os.killpg(pid, SIGTERM)
            except OSError, err:
                err = str(err)
                if err.find("No such process") > 0:
                    if os.path.exists(self.pidfile):
                        os.remove(self.pidfile)
        except:
            LOG.exception()
Example #17
0
def tperf_test_run(perf_conditions):
    tperf_result = dict()
    request_headers = {
        'Authorization': CONF.onos()['rest_auth'],
        'Accept': 'application/json',
        'Content-Type': 'application/json'
    }

    try:
        # 1. creeate instance
        LOG.info("[T-perf server/client VM create] --- ")
        server_vm, client_vm, client_floatingip = traffic_test.create_instance(
            perf_conditions['server'], perf_conditions['client'])

        # 2. run performance test
        if server_vm and client_vm:
            tperf_result = traffic_test.tperf_command_exec(
                server_vm.__dict__['addresses'].values()[0][0]['addr'],
                client_floatingip.ip, perf_conditions['test_options'])
        else:
            tperf_result.update({
                'result': 'FAIL',
                'fail_reason': 'Fail to create instance.'
            })

        tperf_result.update(
            {'transaction_id': perf_conditions['transaction_id']})

        LOG.info("[Traffic Performance Test] Return Result = %s",
                 json.dumps(tperf_result))

        # send tperf test result to ONOS
        response = requests.post(perf_conditions['app_rest_url'],
                                 data=str(json.dumps(tperf_result)),
                                 headers=request_headers)
        LOG.info("[Tperf Result Send] Response = %s %s", response.status_code,
                 response.reason)

        # delete tperf test instance
        traffic_test.delete_test_instance(server_vm, client_vm,
                                          client_floatingip)

    except:
        LOG.exception()
Example #18
0
def onos_rest_check(conn, db_log, node_name, node_ip):
    try:
        web_status = 'ok'

        web_list = []
        fail_reason = []

        web_rt = SshCommand.onos_ssh_exec(node_ip, 'web:list')

        if web_rt is not None:
            for web in CONF.onos()['rest_list']:
                for line in web_rt.splitlines():
                    if line.startswith('ID') or line.startswith('--'):
                        continue

                    if ' ' + web + ' ' in line:
                        if not ('Active' in line and 'Deployed' in line):
                            rest_json = {
                                'name': web,
                                'status': 'nok',
                                'monitor_item': True
                            }
                            fail_reason.append(rest_json)
                            web_status = 'nok'
                        else:
                            rest_json = {
                                'name': web,
                                'status': 'ok',
                                'monitor_item': True
                            }

                        web_list.append(rest_json)

            for line in web_rt.splitlines():
                if line.startswith('ID') or line.startswith('--'):
                    continue

                name = " ".join(line.split()).split(' ')[10]

                if not name in CONF.onos()['rest_list']:
                    if not ('Active' in line and 'Deployed' in line):
                        rest_json = {
                            'name': name,
                            'status': 'nok',
                            'monitor_item': False
                        }
                    else:
                        rest_json = {
                            'name': name,
                            'status': 'ok',
                            'monitor_item': False
                        }

                    web_list.append(rest_json)
        else:
            LOG.error("\'%s\' ONOS Rest Check Error", node_ip)
            web_status = 'fail'
            web_list = 'fail'

        try:
            sql = 'UPDATE ' + DB.ONOS_TBL + \
                  ' SET weblist = \"' + str(web_list) + '\"' +\
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE ONOS REST INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] ONOS REST DB Update Fail.')
        except:
            LOG.exception()

    except:
        LOG.exception()
        web_status = 'fail'

    return web_status, fail_reason
Example #19
0
def swarm_check(conn, db_log, node_name, user_name, node_ip):
    str_node = ''
    str_service = ''
    str_ps = ''

    ret_app = 'ok'
    ret_node = 'ok'

    node_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker node ls')

    if node_rt is not None:
        try:
            leader_flag = False
            for line in node_rt.splitlines():
                line = line.decode('utf-8')
                str_node = str_node + line + '\n'

                if line.startswith('ID'):
                    continue

                if 'Leader' in line:
                    leader_flag = True

                    if not ('Ready' in line and 'Active' in line):
                        ret_node = 'nok'
                        break

                if 'Down' in line:
                    ret_node = 'nok'
                    break

            if not leader_flag:
                ret_node = 'nok'
        except:
            LOG.exception()
            ret_node = 'nok'

    else:
        LOG.error("\'%s\' Swarm Node Check Error", node_ip)
        str_node = 'fail'

    service_rt = SshCommand.ssh_exec(user_name, node_ip,
                                     'sudo docker service ls')

    if service_rt is not None:
        try:
            for app in CONF.swarm()['app_list']:
                find_flag = False
                for line in service_rt.splitlines():
                    line = line.decode('utf-8')

                    if line.startswith('ID'):
                        continue

                    id, name, mode, rep, img = line.split()

                    if app == name:
                        find_flag = True
                        rep_tmp = rep.split('/')

                        if not (rep_tmp[0] == rep_tmp[1]):
                            ret_app = 'nok'
                            break

                if not find_flag:
                    ret_app = 'nok'
                    break
        except:
            LOG.exception()
            ret_app = 'nok'

        for line in service_rt.splitlines():
            line = line.decode('utf-8')
            str_service = str_service + line + '\n'
    else:
        LOG.error("\'%s\' Swarm Service Check Error", node_ip)
        str_service = 'fail'
        ret_app = 'nok'

    try:
        for app in CONF.swarm()['app_list']:
            ps_rt = SshCommand.ssh_exec(user_name, node_ip,
                                        'sudo docker service ps ' + app)

            str_ps = str_ps + ' * ' + app + '\n\n'

            if ps_rt is not None:
                for line in ps_rt.splitlines():
                    line = line.decode('utf-8')
                    str_ps = str_ps + line + '\n'
            else:
                LOG.error("\'%s\' Swarm PS Check Error", node_ip)
                str_ps = str_ps + 'Command failure(' + app + ')\n'

            str_ps = str_ps + '\n'
    except:
        LOG.exception()

    try:
        sql = 'UPDATE ' + DB.SWARM_TBL + \
              ' SET node = \'' + str_node + '\',' + \
              ' service = \'' + str_service + '\',' + \
              ' ps = \'' + str_ps + '\'' + \
              ' WHERE nodename = \'' + node_name + '\''
        db_log.write_log('----- UPDATE SWARM INFO -----\n' + sql)

        if DB.sql_execute(sql, conn) != 'SUCCESS':
            db_log.write_log('[FAIL] SWARN DB Update Fail.')
    except:
        LOG.exception()

    return ret_app, ret_node
Example #20
0
                    # do monitoring
                    pre_stat = watchdog.periodic(conn, pre_stat, db_log)

                    time.sleep(CONF.watchdog()['interval'])
                except:
                    alarm_event.push_event('SimpleFabricWatchd', 'PROC', 'down', 'normal', [], str(datetime.now()), False)
                    conn.close()
                    LOG.exception()


if __name__ == "__main__":

    # change to script directory for relative CONFIG_FILE path
    os.chdir(os.path.dirname(os.path.realpath(sys.argv[0])))
     
    CONF.init()
    LOG.init(CONF.base()['log_file_name'])
    history_log = USER_LOG()
    history_log.set_log('event_history.log', CONF.base()['log_rotate_time'], CONF.base()['log_backup_count'])
    alarm_event.set_history_log(history_log)

    daemon = SimpleFabricWatchD(CONF.get_pid_file())

    if len(sys.argv) == 2:

        if 'start' == sys.argv[1]:
            daemon.start()

        elif 'stop' == sys.argv[1]:
            print "Stopping ..."
            alarm_event.push_event('SimpleFabricWatchd', 'PROC', 'down', 'up', [], str(datetime.now()), True)
Example #21
0
def xos_sync_check(conn, db_log, node_name):
    swarm_sync = 'ok'
    sync_list = []
    fail_reason = []

    try:
        url = CONF.xos()['xos_rest_server']
        account = CONF.xos()['xos_rest_account']

        cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/diags/'
        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("Cmd Fail, cause => %s", error)
            return 'fail', None

        sync_array = json.loads(output)

        for xos_info in sync_array:
            backend_status = xos_info['backend_status']

            LOG.info('xos_sync_backend_status = ' + backend_status)

            tmp = str(backend_status).split('-')

            if tmp[0].strip() in ['0', '1']:
                status = 'ok'
            else:
                status = 'nok'

            # check time
            last_time = json.loads(xos_info['backend_register'])['last_run']
            cur_time = time.time()

            interval = cur_time - last_time
            interval = int(interval)

            if interval >= 30:
                status = 'nok'

            xos_json = {
                'name': xos_info['name'],
                'status': status,
                'description': tmp[1].strip(),
                'last_run_interval': interval
            }
            sync_list.append(xos_json)

            if status == 'nok':
                swarm_sync = 'nok'
                fail_reason.append(xos_json)

            try:
                sql = 'UPDATE ' + DB.XOS_TBL + \
                      ' SET synchronizer = \"' + str(sync_list) + '\"' + \
                      ' WHERE nodename = \'' + node_name + '\''
                db_log.write_log('----- UPDATE SYNCHRONIZER INFO -----\n' +
                                 sql)

                if DB.sql_execute(sql, conn) != 'SUCCESS':
                    db_log.write_log('[FAIL] SYNCHRONIZER DB Update Fail.')
            except:
                LOG.exception()

    except:
        LOG.exception()
        swarm_sync = 'fail'

    return swarm_sync, fail_reason
Example #22
0
def get_gw_ratio_gateway(conn, db_log, node_ip, node_name, rx, gw_rx_sum, pre_stat):
    status = 'ok'
    reason = []

    try:
        sql = 'SELECT ' + DB.ONOS_TBL + '.nodename, nodelist, ip_addr' + ' FROM ' + DB.ONOS_TBL + \
                ' INNER JOIN ' + DB.NODE_INFO_TBL + ' ON ' + DB.ONOS_TBL + '.nodename = ' + DB.NODE_INFO_TBL + '.nodename'

        nodes_info = conn.cursor().execute(sql).fetchall()

        if len(nodes_info) == 0:
            LOG.info('Fail to load onos list')
            return 'fail', pre_stat, reason

        # search data_ip
        data_ip = ''
        manage_ip = ''
        cpt_to_gw_packet = 0
        for nodename, nodelist, ip in nodes_info:
            if not nodelist == 'none':
                for node_info in eval(nodelist):
                    try:
                        if dict(node_info)['management_ip'] == node_ip:
                            manage_ip = ip
                            data_ip = dict(node_info)['data_ip']
                    except:
                        manage_ip = ''

                    if not manage_ip == '':
                        break
            if not manage_ip == '':
                break

        if data_ip == '':
            LOG.info('Can not find data ip')
            return 'fail', pre_stat, reason

        group_rt = SshCommand.onos_ssh_exec(manage_ip, 'groups')

        if group_rt is not None:
            for line in group_rt.splitlines():
                if '{tunnelDst=' + data_ip + '}' in line:
                    tmp = line.split(',')

                    for col in tmp:
                        if 'packets=' in col:
                            cpt_to_gw_packet = cpt_to_gw_packet + int(col.split('=')[1])

        if not dict(pre_stat).has_key(node_name + '_GW'):
            status = '-'
            json_ratio = {'current_rx': '-', 'current_compute_tx': '-', 'current_total': '-',
                          'ratio': '-',
                          'period': CONF.watchdog()['interval'], 'status': status, 'packet_loss': False,
                          'description': ''}
        else:
            cur_rx = rx - int(dict(pre_stat)[node_name + '_GW']['rx'])
            cur_total = gw_rx_sum - int(dict(pre_stat)[node_name + '_GW']['gw_rx_sum'])
            cur_packet = cpt_to_gw_packet - int(dict(pre_stat)[node_name + '_GW']['cpt_to_gw_packet'])

            if cur_rx == 0 and cur_total == 0:
                ratio = 100
            elif cur_rx <= 0 or cur_total < 0:
                ratio = 0
            else:
                ratio = float(cur_rx) * 100 / cur_total

            desc = 'GW RATIO = ' + str(ratio) + ' (' + str(cur_rx) + ' / ' + str(cur_total) + ')'

            loss_flag = False
            if cur_rx < cur_packet:
                LOG.info('GW Ratio Fail. (Data loss)')
                loss_flag = True

            LOG.info('GW Ratio = ' + str(ratio))

            if ratio < float(CONF.alarm()['gw_ratio']) or cur_rx < cur_packet:
                status = 'nok'

            json_ratio = {'current_rx': cur_rx, 'current_compute_tx': cur_packet, 'current_total': cur_total, 'ratio': format(ratio, '.2f'),
                          'period':CONF.watchdog()['interval'], 'status': status, 'packet_loss': loss_flag, 'description': desc}

        try:
            sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
                  ' SET gw_ratio = \"' + str(json_ratio) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE TRAFFIC GW INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] TRAFFIC GW DB Update Fail.')
        except:
            LOG.exception()

        in_out_dic = dict()
        in_out_dic['rx'] = rx
        in_out_dic['gw_rx_sum'] = gw_rx_sum
        in_out_dic['cpt_to_gw_packet'] = cpt_to_gw_packet

        pre_stat[node_name + '_GW'] = in_out_dic
    except:
        LOG.exception()
        status = 'fail'

    if not status == 'ok':
        reason.append(json_ratio)

    return status, pre_stat, reason
Example #23
0
def onos_check(conn, db_log, node_name, node_ip):
    # called on each ONOS node in NODE_INFO_TBL
    try:
        # check cluster nodes
        node_list = []
        node_status = 'ok'
        node_fail_reason = []
        ret, rsp = onos_api_req(node_ip, 'onos/v1/cluster')
        if rsp is not None:
            try:
                node_tbl = dict()
                for node in rsp['nodes']:
                    node_tbl[node['ip']] = node

                for onos_node in CONF.onos()['list']:
                    if len(onos_node.split(':')) != 2:
                        continue
                    id = onos_node.split(':')[0]
                    ip = onos_node.split(':')[1]
                    if id is '' or ip is '':
                        continue
                    if ip in node_tbl:
                        node = node_tbl[ip]
                        node['id'] = id
                        node['monitor_item'] = True
                        if node['status'] != 'READY':
                            node_status = 'nok'
                            node_fail_reason.append('Node ' + id + ' DOWN')
                        node_tbl.pop(ip)
                    else:
                        node = {
                            'id': id,
                            'ip': ip,
                            'status': 'nok',
                            'monitor_item': True
                        }
                        node_status = 'nok'
                        node_fail_reason.append('Node ' + id + ' DOWN')
                    node_list.append(node)

                for node in node_tbl.values():
                    node['monitor_item'] = False
                    node_list.append(node)

            except:
                LOG.exception()
                LOG.error("\'%s\' ONOS Check Error(nodes)", node_ip)
                node_status = 'fail'

        # check devices
        device_list = []
        device_status = 'ok'
        device_fail_reason = []
        ret, rsp = onos_api_req(node_ip, 'onos/v1/devices')
        if rsp is not None:
            try:
                device_tbl = dict()
                for device in rsp['devices']:
                    device['id'] = 'of:' + device['chassisId'].rjust(16, '0')
                    device_tbl[device['id']] = device

                for id in CONF.onos()['device_list']:
                    if id is '':
                        continue
                        # no config
                    if id in device_tbl:
                        device = device_tbl[id]
                        device['monitor_item'] = True
                        if not device['available']:
                            device_status = 'nok'
                            device_fail_reason.append('Device ' + id + ' DOWN')
                        device_tbl.pop(id)
                    else:
                        device = {
                            'id': id,
                            'available': False,
                            'channelId': '-',
                            'name': '-',
                            'role': '-',
                            'monitor_item': True
                        }
                        device_status = 'nok'
                        device_fail_reason.append('Device ' + id + ' DOWN')
                    device_list.append(device)

                for device in device_tbl.values():
                    device['monitor_item'] = False
                    device_list.append(device)

            except:
                LOG.exception()
                LOG.error("\'%s\' ONOS Check Error(devices)", node_ip)
                device_status = 'fail'
        else:
            LOG.error("\'%s\' ONOS Check Error(devices)", node_ip)
            device_status = 'fail'

        # check links
        link_list = []
        link_status = 'ok'
        link_fail_reason = []
        ret, rsp = onos_api_req(node_ip, 'onos/v1/links')
        if rsp is not None:
            try:
                link_tbl = dict()
                for link in rsp['links']:
                    link['src'] = link['src']['device'] + '/' + link['src'][
                        'port']
                    link['dst'] = link['dst']['device'] + '/' + link['dst'][
                        'port']
                    link_tbl[link['src'] + '-' + link['dst']] = link

                for id in CONF.onos()['link_list']:
                    if id is '':
                        continue
                    if len(id.split('-')) != 2:
                        link = {
                            'src': id,
                            'dst': '(invalid_link_config)',
                            'expected': 'false',
                            'state': '-',
                            'type': "-",
                            'monitor_item': True
                        }
                        link_status = 'nok'
                        link_fail_reason.append(
                            'Link ' + id + ' is configed as INVALID ID FORMAT')
                        link_list.append(link)
                        continue

                    if id in link_tbl:
                        link = link_tbl[id]
                        link['monitor_item'] = True
                        if link['state'] != 'ACTIVE':
                            link_status = 'nok'
                            link_fail_reason.append('Link ' + id + ' DOWN')
                        link_list.append(link)
                        link_tbl.pop(id)
                    else:
                        link = {
                            'src': id.split('-')[0],
                            'dst': id.split('-')[1],
                            'expected': 'false',
                            'state': '-',
                            'type': "-",
                            'monitor_item': True
                        }
                        link_status = 'nok'
                        link_fail_reason.append('Link ' + id + ' DOWN')
                        link_list.append(link)

                    rev_id = id.split('-')[1] + '-' + id.split('-')[0]
                    if rev_id in link_tbl:
                        link = link_tbl[rev_id]
                        link['monitor_item'] = True
                        if link['state'] != 'ACTIVE':
                            link_status = 'nok'
                            link_fail_reason.append('Link' + rev_id + ' DOWN')
                        link_list.append(link)
                        link_tbl.pop(rev_id)
                    else:
                        link = {
                            'src': rev_id.split('-')[0],
                            'dst': rev_id.split('-')[1],
                            'expected': 'false',
                            'state': '-',
                            'type': "-",
                            'monitor_item': True
                        }
                        link_status = 'nok'
                        link_fail_reason.append('Link ' + rev_id + ' DOWN')
                        link_list.append(link)

                for link in link_tbl.values():
                    link['monitor_item'] = False
                    link_list.append(link)

            except:
                LOG.exception()
                LOG.error("\'%s\' ONOS Check Error(links)", node_ip)
                link_status = 'fail'

        # check apps
        app_list = []
        app_status = 'ok'
        app_fail_reason = []
        ret, rsp = onos_api_req(node_ip, 'onos/v1/applications')
        if rsp is not None:
            try:
                active_app_list = []
                for app_rsp in rsp['applications']:
                    if app_rsp['state'] == 'ACTIVE':
                        active_app_list.append(app_rsp['name'].replace(
                            'org.onosproject.', ''))

                for app in CONF.onos()['app_list']:
                    if app in active_app_list:
                        app_json = {
                            'name': app,
                            'status': 'ok',
                            'monitor_item': True
                        }
                        active_app_list.remove(app)
                    else:
                        app_json = {
                            'name': app,
                            'status': 'nok',
                            'monitor_item': True
                        }
                        app_status = 'nok'
                        app_fail_reason.append(app_json)
                    app_list.append(app_json)

                for app in active_app_list:
                    app_json = {
                        'name': app,
                        'status': 'ok',
                        'monitor_item': False
                    }
                    app_list.append(app_json)

            except:
                LOG.exception()
                LOG.error("\'%s\' ONOS Check Error(apps)", node_ip)
                app_status = 'fail'

        else:
            LOG.error("\'%s\' ONOS Check Error(apps)", node_ip)
            link_status = 'fail'

        # store to db
        try:
            sql = 'UPDATE ' + DB.ONOS_TBL + \
                  ' SET ' + \
                  ' cluster = \"' + str(node_list) + '\",' \
                  ' device = \"' + str(device_list) + '\",' \
                  ' link = \"' + str(link_list) + '\",' \
                  ' app = \"' + str(app_list) + '\"' \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE ONOS CONNECTION INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] ONOS CONNECTION DB Update Fail.')
        except:
            LOG.exception()

    except:
        LOG.exception()
        cluster_status = 'fail'
        device_status = 'fail'
        link_status = 'fail'
        app_status = 'fail'

    return node_status, device_status, link_status, app_status, node_fail_reason, device_fail_reason, link_fail_reason, app_fail_reason
Example #24
0
    def run(self):
        db_log = USER_LOG()
        db_log.set_log('db.log',
                       CONF.base()['log_rotate_time'],
                       CONF.base()['log_backup_count'])

        pre_stat = dict()

        # DB initiation
        DB.db_initiation(db_log)

        # Start RESTful server
        try:
            REST_SVR.rest_server_start()
        except:
            print 'Rest Server failed to start'
            LOG.exception()
            self.exit()

        # Periodic monitoring
        if CONF.watchdog()['interval'] == 0:
            LOG.info("--- Not running periodic monitoring ---")
            while True:
                time.sleep(3600)
        else:
            LOG.info("--- Periodic Monitoring Start ---")
            history_log.write_log("--- Event History Start ---")

            conn = DB.connection()

            exitFlag = False
            while True:
                try:
                    i = 0
                    while i < 3:
                        i = i + 1
                        # check rest server
                        try:
                            url = 'http://' + socket.gethostbyname(
                                socket.gethostname()) + ':' + str(CONF.rest(
                                )['rest_server_port']) + '/alive-check'

                            cmd = 'curl -X GET \"' + url + '\"'
                            LOG.info('cmd = ' + cmd)
                            result = Popen(cmd,
                                           stdout=PIPE,
                                           stderr=PIPE,
                                           shell=True)
                            output, error = result.communicate()

                            if result.returncode != 0:
                                LOG.info('REST SERVER CHECK FAIL [' + str(i) +
                                         ']')

                                if i == 3:
                                    LOG.info('fail to check rest server.')
                                    alarm_event.push_event(
                                        'sonawatcher',
                                        'SONAWATCHER_DISCONNECT', 'critical',
                                        'normal',
                                        'sonawatcher server shutdown',
                                        str(datetime.now()))
                                    conn.close()
                                    exitFlag = True
                                    self.exit()
                                    break
                            else:
                                break

                        except:
                            LOG.exception()

                    if exitFlag:
                        break

                    pre_stat = watchdog.periodic(conn, pre_stat, db_log)

                    time.sleep(CONF.watchdog()['interval'])
                except:
                    alarm_event.push_event('sonawatcher',
                                           'SONAWATCHER_DISCONNECT',
                                           'critical', 'normal',
                                           'sonawatcher server shutdown',
                                           str(datetime.now()))
                    conn.close()
                    LOG.exception()
Example #25
0
# Copyright (c) 2017 by Telcoware
# All Rights Reserved.
# SONA Monitoring Solutions.

import sys
import time

import monitor.watchdog as watchdog
import api.rest_server as REST_SVR
from api.config import CONF
from api.sona_log import LOG
from api.watcherdb import DB
from daemon import Daemon
from datetime import datetime

PIDFILE = CONF.get_pid_file()


class SonaWatchD(Daemon):
    def run(self):

        # DB initiation
        DB.db_initiation()

        # Start RESTful server
        try:
            REST_SVR.rest_server_start()
        except:
            print 'Rest Server failed to start'
            LOG.exception()
            sys.exit(1)
Example #26
0
import socket
import requests
from subprocess import Popen, PIPE

import monitor.alarm_event as alarm_event
import monitor.watchdog as watchdog
import api.rest_server as REST_SVR
from api.config import CONF
from api.sona_log import LOG
from api.sona_log import USER_LOG
from api.watcherdb import DB
from daemon import Daemon
from datetime import datetime
from signal import SIGTERM

PIDFILE = CONF.get_pid_file()


class SonaWatchD(Daemon):
    def exit(self):
        try:
            pf = file(PIDFILE, 'r')
            pid = int(pf.read().strip())
            pf.close()

            LOG.info("--- Daemon STOP [fail to check rest server] ---")

            try:
                LOG.info('PID = ' + str(pid))
                os.killpg(pid, SIGTERM)
            except OSError, err:
Example #27
0
def vrouter_check(conn, db_log, node_name, user_name, node_ip):
    ret_docker = 'ok'

    docker_list = []
    fail_list = []

    onos_id = ''

    docker_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker ps')

    if docker_rt is not None:
        try:
            for docker in CONF.openstack()['docker_list']:
                for line in docker_rt.splitlines():
                    if line.startswith('CONTAINER'):
                        continue

                    tmp_line = line.split()

                    if ' ' + docker in line:
                         if not 'Up' in line:
                             docker_json = {'name': docker, 'status': 'nok', 'type': 'docker'}
                             fail_list.append(docker_json)
                             ret_docker = 'nok'
                         else:
                             docker_json = {'name': docker, 'status': 'ok', 'type': 'docker'}

                         docker_list.append(docker_json)

                    if 'onos' in tmp_line[1]:
                        onos_id = tmp_line[0]
        except:
            LOG.exception()
    else:
        LOG.error("\'%s\' Vrouter Node Check Error", node_ip)
        ret_docker = 'fail'

    onos_app_list = []
    route_list = []

    if not onos_id == '':
        try:
            # get onos container ip
            onos_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker inspect ' + onos_id + ' | grep IPAddress')

            if onos_rt is not None:
                for line in onos_rt.splitlines():
                    line = line.strip()
                    if line.startswith('\"IPAddress'):
                        tmp = line.split(':')
                        onos_ip = tmp[1].strip().replace('\"', '').replace(',', '')
                        break

                app_list = SshCommand.ssh_pexpect(user_name, node_ip, onos_ip, 'apps -a -s')

                app_active_list = list()
                for line in app_list.splitlines():
                    if line.startswith('fail'):
                        continue

                    app_active_list.append(line.split(".")[2].split()[0])

                for app in CONF.openstack()['onos_vrouter_app_list']:
                    if app in app_active_list:
                        app_json = {'name': app, 'status': 'ok', 'type': 'onos_app'}
                    else:
                        app_json = {'name': app, 'status': 'nok', 'type': 'onos_app'}
                        fail_list.append(app_json)
                        ret_docker = 'nok'

                    onos_app_list.append(app_json)

                str_route = SshCommand.ssh_pexpect(user_name, node_ip, onos_ip, 'routes')

                for line in str_route.splitlines():
                    line = line.strip()

                    if (line.startswith('Table') or line.startswith('Network') or line.startswith('Total')):
                        continue

                    new_line = " ".join(line.split())

                    if new_line.startswith('fail'):
                        continue

                    tmp = new_line.split(' ')
                    route_json = {'network': tmp[0], 'next_hop': tmp[1]}
                    route_list.append(route_json)
        except:
            LOG.exception()
    else:
        LOG.info('can not find onos_id.')
        ret_docker = 'fail'

    try:
        sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
              ' SET docker = \"' + str(docker_list) + '\",' + \
              ' onosApp = \"' + str(onos_app_list) + '\",' + \
              ' routingTable = \"' + str(route_list) + '\"' + \
              ' WHERE nodename = \'' + node_name + '\''
        db_log.write_log('----- UPDATE GATEWAY INFO -----\n' + sql)

        if DB.sql_execute(sql, conn) != 'SUCCESS':
            db_log.write_log('[FAIL] GATEWAY DB Update Fail.')
    except:
        LOG.exception()

    return ret_docker, fail_list
Example #28
0
def get_internal_traffic(conn, db_log, node_name, node_ip, user_name, sub_type, rx_count, patch_tx, pre_stat):
    try:
        status = 'ok'
        in_packet = 0
        out_packet = 0

        reason_list = []
        desc = ''

        if sub_type == 'COMPUTE':
            flow_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo ovs-ofctl -O OpenFlow13 dump-flows br-int')

            inport_cnt = 0
            gw_cnt = 0
            output_cnt = 0

            if flow_rt is not None:
                for line in flow_rt.splitlines():
                    tmp = line.split(',')
                    if 'in_port' in line:
                        inport_cnt = inport_cnt + int(tmp[3].split('=')[1])
                    elif 'output' in line:
                        output_cnt = output_cnt + int(tmp[3].split('=')[1])
                    elif 'actions=group' in line:
                        gw_cnt = gw_cnt + int(tmp[3].split('=')[1])

                in_packet = inport_cnt + rx_count
                out_packet = gw_cnt + output_cnt

                port_json = {'vm_tx': inport_cnt, 'vxlan_rx': rx_count, 'out_gw': gw_cnt, 'output': output_cnt}
            else:
                port_json = {'vm_tx': -1, 'vxlan_rx': -1, 'out_gw': -1, 'output': -1}
                status = 'fail'

        else:
            port_json = {'vxlan_rx': rx_count, 'patch-integ': patch_tx}

            if patch_tx == -1:
                status = 'fail'
            else:
                in_packet = rx_count
                out_packet = patch_tx

        for_save_in = in_packet
        for_save_out = out_packet

        if not dict(pre_stat).has_key(node_name + '_internal'):
            status = '-'
            vxlan_json = {'port_stat_in_out': port_json, 'period': CONF.watchdog()['interval'],
                          'ratio': 0, 'current_rx': -1, 'current_tx': -1,
                          'description': desc, 'threshold': CONF.alarm()['internal_traffic_ratio'], 'status': status}
        elif status == 'ok':
            in_packet = in_packet - int(dict(pre_stat)[node_name + '_internal']['in_packet'])
            out_packet = out_packet - int(dict(pre_stat)[node_name + '_internal']['out_packet'])

            if in_packet == 0 and out_packet == 0:
                ratio = 100
            elif in_packet <= 0 or out_packet < 0:
                LOG.info('Internal Traffic Ratio Fail.')
                ratio = 0
            else:
                ratio = float(out_packet) * 100 / in_packet

            LOG.info('Internal Traffic Ratio = ' + str(ratio))
            desc = 'Internal Traffic Ratio = ' + str(ratio) + '(' + str(out_packet) + '/' + str(in_packet) + ')'

            if ratio < float(CONF.alarm()['internal_traffic_ratio']):
                status = 'nok'

            vxlan_json = {'port_stat_in_out': port_json, 'period': CONF.watchdog()['interval'],
                          'ratio': format(ratio, '.2f'), 'current_rx': in_packet, 'current_tx': out_packet,
                          'description': desc, 'threshold': CONF.alarm()['internal_traffic_ratio'], 'status': status}

        in_out_dic = dict()
        in_out_dic['in_packet'] = for_save_in
        in_out_dic['out_packet'] = for_save_out
        pre_stat[node_name + '_internal'] = in_out_dic

        try:
            sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
                  ' SET internal_traffic = \"' + str(vxlan_json) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE INTERNAL TRAFFIC INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] INTERNAL TRAFFIC DB Update Fail.')
        except:
            LOG.exception()
    except:
        LOG.exception()
        status = 'fail'

    if not status == 'ok':
        reason_list.append(vxlan_json)

    return status, pre_stat, reason_list
Example #29
0
def get_node_traffic(conn, db_log, node_name, rx_dic, tx_dic, total_rx, total_tx, err_info, pre_stat):
    try:
        status = 'ok'
        reason_list = []

        pre_total_rx = total_rx
        pre_total_tx = total_tx

        # check minimum packet count
        sql = 'SELECT data_ip FROM ' + DB.OPENSTACK_TBL + ' WHERE nodename = \'' + node_name + '\''
        data_ip = conn.cursor().execute(sql).fetchone()[0]

        sql = 'SELECT ip_addr FROM ' + DB.NODE_INFO_TBL + ' WHERE type = \'ONOS\''
        nodes_info = conn.cursor().execute(sql).fetchall()

        min_rx = 0
        if len(nodes_info) == 0:
            LOG.info('Fail to load onos list')
            status = 'fail'
        else:
            for ip in nodes_info:
                flows_rt = SshCommand.onos_ssh_exec(ip[0], '\"flows --filter \'{tunnelDst=' + data_ip + '}\' --short\"')

                if flows_rt is not None:
                    for line in flows_rt.splitlines():
                        if 'tunnelDst' in line:
                            min_rx = min_rx + int(line.split(',')[2].split('=')[1])
                    break

        if not dict(pre_stat).has_key(node_name + '_VXLAN'):
            status = '-'
            ratio = -1
        else:
            total_rx = total_rx - int(dict(pre_stat)[node_name + '_VXLAN']['total_rx'])
            total_tx = total_tx - int(dict(pre_stat)[node_name + '_VXLAN']['total_tx'])
            cur_min = min_rx - int(dict(pre_stat)[node_name + '_VXLAN']['min_rx'])

            if total_rx == 0 and total_tx == 0:
                ratio = 100
            elif total_tx <= 0 or total_tx < 0:
                LOG.info('Node Traffic Ratio Fail.')
                ratio = 0
            else:
                ratio = float(total_rx) * 100 / total_tx

        LOG.info('Node Traffic Ratio = ' + str(ratio))

        port_json = {'rx': rx_dic[node_name], 'minimum_rx': min_rx, 'rx_drop': err_info['rx_drop'], 'rx_errs': err_info['rx_err'],
                      'tx': tx_dic[node_name], 'tx_drop': err_info['tx_drop'], 'tx_errs': err_info['tx_err']}

        description = ''

        if not status == '-':
            description = 'Ratio of success for all nodes = ' + str(ratio)  + ' (' + str(total_rx) + ' / ' + str(total_tx) + ')'

            if ratio < float(CONF.alarm()['node_traffic_ratio']):
                LOG.info('[NODE TRAFFIC] ratio nok')
                status = 'nok'

            if total_rx < cur_min:
                LOG.info('CUR_MIN_RX = ' + str(cur_min) + ', CUR_RX = ' + str(total_rx) + ', Less than rx minimum.')
                status = 'nok'

            if err_info['rx_drop'] - int(dict(pre_stat)[node_name + '_VXLAN']['rx_drop']) > 0:
                LOG.info('[NODE TRAFFIC] rx_drop nok')
                status = 'nok'

            if err_info['rx_err'] - int(dict(pre_stat)[node_name + '_VXLAN']['rx_err']) > 0:
                LOG.info('[NODE TRAFFIC] rx_err nok')
                status = 'nok'

            if err_info['tx_drop'] - int(dict(pre_stat)[node_name + '_VXLAN']['tx_drop']) > 0:
                LOG.info('[NODE TRAFFIC] tx_drop nok')
                status = 'nok'

            if err_info['tx_err'] - int(dict(pre_stat)[node_name + '_VXLAN']['tx_err']) > 0:
                LOG.info('[NODE TRAFFIC] tx_err nok')
                status = 'nok'

        in_out_dic = dict()
        in_out_dic['total_rx'] = pre_total_rx
        in_out_dic['total_tx'] = pre_total_tx

        in_out_dic['min_rx'] = min_rx

        in_out_dic['rx_drop'] = err_info['rx_drop']
        in_out_dic['rx_err'] = err_info['rx_err']
        in_out_dic['tx_drop'] = err_info['tx_drop']
        in_out_dic['tx_err'] = err_info['tx_err']

        pre_stat[node_name + '_VXLAN'] = in_out_dic
    except:
        LOG.exception()
        status = 'fail'

    vxlan_json = {'port_stat_vxlan': port_json, 'period': CONF.watchdog()['interval'],
                  'ratio': format(ratio, '.2f'), 'current_rx': total_rx, 'current_tx': total_tx,
                  'description': description, 'threshold': CONF.alarm()['node_traffic_ratio'], 'status': status}

    try:
        sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
              ' SET vxlan_traffic = \"' + str(vxlan_json) + '\"' + \
              ' WHERE nodename = \'' + node_name + '\''
        db_log.write_log('----- UPDATE VXLAN STAT INFO -----\n' + sql)

        if DB.sql_execute(sql, conn) != 'SUCCESS':
            db_log.write_log('[FAIL] VXLAN STAT DB Update Fail.')
    except:
        LOG.exception()

    if not status == 'ok':
        reason_list.append(vxlan_json)

    return status, pre_stat, reason_list
Example #30
0
def get_gw_ratio_compute(conn, db_log, node_ip, node_name, pre_stat):
    status = 'ok'
    reason = []

    try:
        sql = 'SELECT ' + DB.ONOS_TBL + '.nodename, nodelist, ip_addr' + ' FROM ' + DB.ONOS_TBL + \
                ' INNER JOIN ' + DB.NODE_INFO_TBL + ' ON ' + DB.ONOS_TBL + '.nodename = ' + DB.NODE_INFO_TBL + '.nodename'

        nodes_info = conn.cursor().execute(sql).fetchall()

        if len(nodes_info) == 0:
            LOG.info('Fail to load onos list')
            return 'fail', pre_stat, reason

        manage_ip = ''
        hostname = ''
        for nodename, nodelist, ip in nodes_info:
            if not nodelist == 'none':
                for node_info in eval(nodelist):
                    try:
                        if dict(node_info)['management_ip'] == node_ip:
                            manage_ip = ip
                            hostname = dict(node_info)['hostname']
                    except:
                        manage_ip = ''

                    if not manage_ip == '':
                        break
            if not manage_ip == '':
                break

        if hostname == '':
            LOG.info('Can not find hostname')
            return 'fail', pre_stat, reason

        try:
            sql = 'SELECT of_id FROM ' + DB.OPENSTACK_TBL + ' WHERE hostname = \'' + str(hostname) + '\''
            LOG.info(sql)
            node_info = conn.cursor().execute(sql).fetchone()

            of_id = node_info[0]
        except:
            LOG.exception()
            LOG.info('Can not find of_id')
            return 'fail', pre_stat, reason

        group_rt = SshCommand.onos_ssh_exec(manage_ip, 'groups')

        total_cnt = 0
        gw_list = []
        if group_rt is not None:
            for line in group_rt.splitlines():
                if of_id in line:
                    tmp = line.split(',')

                    for col in tmp:
                        if 'packets=' in col:
                            total_cnt = total_cnt + int(col.split('=')[1])
                            gw_list.append(int(col.split('=')[1]))

        str_ratio = ''

        if not dict(pre_stat).has_key(node_name + '_GW'):
            status = '-'
            json_ratio = {'ratio': '-', 'status': status, 'period':CONF.watchdog()['interval'], 'status': status}
        else:
            i = 0
            for gw in gw_list:
                cur_gw = gw - pre_stat[node_name + '_GW']['gw_list'][i]
                cur_total = total_cnt - pre_stat[node_name + '_GW']['gw_total']

                LOG.info('cur_gw = ' + str(cur_gw))
                LOG.info('cur_total = ' + str(cur_total))

                if cur_gw == 0 and cur_total == 0:
                    ratio = 100/len(gw_list)
                elif cur_gw <= 0 or cur_total <= 0:
                    ratio = 0
                else:
                    ratio = float(cur_gw) * 100 / cur_total

                i = i + 1
                str_ratio = str_ratio + str(ratio) + ':'

                if ratio < float(CONF.alarm()['gw_ratio']):
                    status = 'nok'

            json_ratio = {'ratio': str_ratio.rstrip(':'), 'status': status, 'period':CONF.watchdog()['interval'], 'status': status}
            LOG.info('[COMPUTE] ' + 'GW_RATIO = ' + str_ratio.rstrip(':'))

        try:
            sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
                  ' SET gw_ratio = \"' + str(json_ratio) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE TRAFFIC GW INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] TRAFFIC GW DB Update Fail.')
        except:
            LOG.exception()

        in_out_dic = dict()
        in_out_dic['gw_list'] = gw_list
        in_out_dic['gw_total'] = total_cnt

        pre_stat[node_name + '_GW'] = in_out_dic

    except:
        LOG.exception()
        status = 'fail'

    if not status == 'ok':
        reason.append(json_ratio)

    return status, pre_stat, reason