예제 #1
0
def get_node_list(nodes, param, tbl=DB.NODE_INFO_TBL, add_cond=''):
    try:
        if nodes == 'all':
            sql = 'SELECT ' + param + ' FROM ' + tbl
            if not add_cond == '':
                sql = sql + ' WHERE ' + add_cond
        else:
            sql = 'SELECT ' + param + ' FROM ' + tbl + ' WHERE nodename = \'' + nodes + '\''
            if not add_cond == '':
                sql = sql + ' and ' + add_cond

        with DB.connection() as conn:
            nodes_info = conn.cursor().execute(sql).fetchall()

        conn.close()
        return nodes_info
    except:
        LOG.exception()
        return None
예제 #2
0
def net_check(node):
    try:
        if CONF.watchdog()['method'] == 'ping':
            timeout = CONF.watchdog()['timeout']
            if sys.platform == 'darwin':
                timeout = timeout * 1000

            cmd = 'ping -c1 -W%d -n %s' % (timeout, node)

            result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
            output, error = result.communicate()

            if result.returncode != 0:
                LOG.error("\'%s\' Network Check Error(%d) ", node, result.returncode)
                return 'nok'
            else:
                return 'ok'
    except:
        LOG.exception()
예제 #3
0
def regi_url(url, auth):
    try:
        sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL + ' WHERE url = \'' + url + '\''
        sql_evt = 'SELECT * FROM ' + DB.EVENT_TBL

        with DB.connection() as conn:
            url_info = conn.cursor().execute(sql).fetchall()
            evt_list = conn.cursor().execute(sql_evt).fetchall()
        conn.close()

        event_list = []

        for nodename, item, grade, desc, time in evt_list:
            if not grade in ['ok', 'normal']:
                evt = {
                    'event': 'occur',
                    'system': nodename,
                    'item': item,
                    'grade': grade,
                    'desc': desc,
                    'time': time
                }
                event_list.append(evt)

        # if already exist
        if len(url_info) == 1:
            res_body = {'Result': 'SUCCESS', 'Event list': event_list}
        else:
            # insert db
            sql = 'INSERT INTO ' + DB.REGI_SYS_TBL + ' VALUES (\'' + url + '\', \'' + auth + '\' )'

            ret = DB.sql_execute(sql)

            if ret == 'SUCCESS':
                res_body = {'Result': 'SUCCESS', 'Event list': event_list}
            else:
                res_body = {'Result': 'FAIL'}

        return res_body
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
예제 #4
0
def parse_command(req_obj):
    try:
        res_body = dict()
        res_body['command'] = req_obj['command']
        res_body['system'] = req_obj['system']

        try:
            res_body['param'] = req_obj['param']
        except:
            res_body['param'] = ''

        ret = COMMAND_MAP[req_obj['command']](req_obj['system'],
                                              req_obj['param'])
        res_body['result'] = ret
        res_body['time'] = str(datetime.now())

        return res_body
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
예제 #5
0
def proc_dis_system(node, dummy):
    try:
        nodes_info = get_node_list(node,
                                   'nodename, ping, app, cpu, memory, disk',
                                   DB.STATUS_TBL)

        result = dict()

        for nodename, ping, app, cpu, memory, disk in nodes_info:
            result[nodename] = {
                'ping': ping,
                'app': app,
                'cpu': cpu,
                'memory': memory,
                'disk': disk
            }

        return result
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
예제 #6
0
def proc_dis_node(node, param):
    try:
        if param == 'list':
            nodes_info = get_node_list(node, 'nodename, nodelist', DB.ONOS_TBL)
        elif param == 'port':
            nodes_info = get_node_list(node, 'nodename, port', DB.ONOS_TBL)

        if len(nodes_info) == 0:
            return {'fail': 'This is not a command on the target system.'}

        res_result = dict()
        for nodename, value in nodes_info:
            if value == 'none':
                res_result[nodename] = 'FAIL'
            else:
                res_result[nodename] = eval(value)

        return res_result
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
예제 #7
0
def get_service_list():
    service_list = []

    try:
        url = CONF.xos()['xos_rest_server']
        account = CONF.xos()['xos_rest_account']

        cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/instances/'
        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("Cmd Fail, cause => %s", error)
            return ''

        instance_array = json.loads(output)

        for instance_info in instance_array:
            name = instance_info['instance_name']

            LOG.info('swarm_instance_name = ' + name)

            service_list.append(name)

    except:
        LOG.exception()

    return service_list
예제 #8
0
def push_event(node_name, item, grade, pre_grade, reason, time):
    global history_log

    try:
        history_log.write_log('[%s][%s][%s][%s] %s', node_name, item, grade,
                              pre_grade, reason)

        sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL

        with DB.connection() as conn:
            url_list = conn.cursor().execute(sql).fetchall()

        conn.close()

        for url, auth in url_list:
            header = {
                'Content-Type': 'application/json',
                'Authorization': str(auth)
            }
            req_body = {
                'system': node_name,
                'item': item,
                'grade': grade,
                'pre_grade': pre_grade,
                'reason': reason,
                'time': time
            }
            req_body_json = json.dumps(req_body)

            try:
                requests.post(str(url),
                              headers=header,
                              data=req_body_json,
                              timeout=2)
            except:
                # Push event does not respond
                pass
    except:
        LOG.exception()
예제 #9
0
def proc_dis_onos(node, param):
    try:
        if param == 'app':
            nodes_info = get_node_list(node, 'nodename, applist', DB.ONOS_TBL)

        if param == 'rest':
            nodes_info = get_node_list(node, 'nodename, weblist', DB.ONOS_TBL)

        if len(nodes_info) == 0:
            return {'fail': 'This is not a command on the target system.'}

        res_result = dict()
        for nodename, app_rest_list in nodes_info:
            if app_rest_list == 'fail' or app_rest_list == 'none':
                res_result[nodename] = 'FAIL'
            else:
                res_result[nodename] = eval(app_rest_list)

        return res_result
    except:
        LOG.exception()
        return {'Result': 'FAIL'}
예제 #10
0
def proc_onos_cmd(node, cmd):
    try:
        nodes_info = get_node_list(node, 'ip_addr, type', DB.NODE_INFO_TBL)

        if len(nodes_info) == 0:
            return {'fail': 'This is not a command on the target system.'}

        for ip, type in nodes_info:
            if not type == 'ONOS':
                return {'fail': 'This is not a command on the target system.'}
            else:
                res_result = dict()
                cmd_rt = SshCommand.onos_ssh_exec(ip, cmd)

                if not cmd_rt is None:
                    res_result[node] = str(cmd_rt)
                else:
                    return {'fail': 'Invalid command.'}

                return res_result
    except:
        LOG.exception()
예제 #11
0
def check_resource(conn, db_log, node_name, user_name, node_ip):
    try:
        cpu = str(get_cpu_usage(user_name, node_ip, True))
        mem = str(get_mem_usage(user_name, node_ip, True))
        disk = str(get_disk_usage(user_name, node_ip, True))

        try:
            sql = 'UPDATE ' + DB.RESOURCE_TBL + \
                  ' SET cpu = \'' + cpu + '\',' + \
                  ' memory = \'' + mem + '\',' + \
                  ' disk = \'' + disk + '\'' \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE RESOURCE INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] RESOURCE DB Update Fail.')
        except:
            LOG.exception()

        return cpu, mem, disk
    except:
        LOG.exception()
        return -1, -1, -1
예제 #12
0
def send_response_traffic_test_old(cond, auth):
    trace_result_data = {}

    try:
        is_success, result = trace.traffic_test_old(cond)

        if is_success:
            trace_result_data['result'] = 'SUCCESS'
        else:
            trace_result_data['result'] = 'FAIL'
            # trace_result_data['fail_reason'] = 'The source ip does not exist.'

        if result != None:
            trace_result_data['traffic_test_result'] = result

        trace_result_data['transaction_id'] = cond['transaction_id']
        try:
            LOG.info('%s',
                     json.dumps(trace_result_data, sort_keys=True, indent=4))
        except:
            pass

        req_body_json = json.dumps(trace_result_data)

        try:
            url = str(cond['app_rest_url'])
            #requests.post(str(url), headers=header, data=req_body_json, timeout=2)

            if str(auth).startswith('Basic '):
                auth = str(auth).split(' ')[1]

            cmd = 'curl -X POST -u \'' + CONF.onos(
            )['rest_auth'] + '\' -H \'Content-Type: application/json\' -d \'' + str(
                req_body_json) + '\' ' + url
            LOG.error('%s', 'curl = ' + cmd)
            result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
            result.communicate()

            if result.returncode != 0:
                # Push noti does not respond
                pass
        except:
            LOG.exception()
            pass

    except:
        LOG.exception()
예제 #13
0
 def get_content(self):
     if not self.headers.getheader('content-length'):
         self.do_HEAD(400)
         self.wfile.write(str({"result": "FAIL", "fail_reason": "Bad Request, Content Length is 0\n"}))
         LOG.info('[TRACE REST-S] Received No Data from %s', self.client_address)
         return False
     else:
         try:
             receive_data = json.loads(self.rfile.read(int(self.headers.getheader("content-length"))))
             LOG.info('%s', '[Trace Conditions] \n' + json.dumps(receive_data, sort_keys=True, indent=4))
             return receive_data
         except:
             LOG.exception()
             error_reason = 'Trace Request Json Data Parsing Error\n'
             self.do_HEAD(400)
             self.wfile.write(str({"result": "FAIL", "fail_reason": error_reason}))
             LOG.info('[TRACE] %s', error_reason)
             return False
예제 #14
0
def tperf_test_run(perf_conditions):
    tperf_result = dict()
    request_headers = {
        'Authorization': CONF.onos()['rest_auth'],
        'Accept': 'application/json',
        'Content-Type': 'application/json'
    }

    try:
        # 1. creeate instance
        LOG.info("[T-perf server/client VM create] --- ")
        server_vm, client_vm, client_floatingip = traffic_test.create_instance(
            perf_conditions['server'], perf_conditions['client'])

        # 2. run performance test
        if server_vm and client_vm:
            tperf_result = traffic_test.tperf_command_exec(
                server_vm.__dict__['addresses'].values()[0][0]['addr'],
                client_floatingip.ip, perf_conditions['test_options'])
        else:
            tperf_result.update({
                'result': 'FAIL',
                'fail_reason': 'Fail to create instance.'
            })

        tperf_result.update(
            {'transaction_id': perf_conditions['transaction_id']})

        LOG.info("[Traffic Performance Test] Return Result = %s",
                 json.dumps(tperf_result))

        # send tperf test result to ONOS
        response = requests.post(perf_conditions['app_rest_url'],
                                 data=str(json.dumps(tperf_result)),
                                 headers=request_headers)
        LOG.info("[Tperf Result Send] Response = %s %s", response.status_code,
                 response.reason)

        # delete tperf test instance
        traffic_test.delete_test_instance(server_vm, client_vm,
                                          client_floatingip)

    except:
        LOG.exception()
예제 #15
0
def onos_ha_check(conn, db_log):
    try:
        stats_url = CONF.ha()['ha_proxy_server']
        account = CONF.ha()['ha_proxy_account']

        cmd = 'curl --user ' + account + ' --header \'Accept: text/html, application/xhtml+xml, image/jxr, */*\' \"' + stats_url + '\"'
        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("Cmd Fail, cause => %s", error)
            return None
        else:
            report_data = csv.DictReader(output.lstrip('# ').splitlines())

        dic_stat = dict()
        for row in report_data:
            if row['pxname'].strip() == 'stats' or row['svname'].strip(
            ) == 'BACKEND':
                continue

            dtl_list = {
                'name': row['svname'],
                'req_count': row['stot'],
                'succ_count': row['hrsp_2xx'],
                'node_sts': row['status']
            }

            svc_type = row['pxname']

            if (dic_stat.has_key(svc_type)):
                dic_stat[svc_type].append(dtl_list)
            else:
                dic_stat[svc_type] = list()
                dic_stat[svc_type].append(dtl_list)

        try:
            str_dic_stat = str(dic_stat)

            sql = 'UPDATE ' + DB.HA_TBL + \
                  ' SET stats = \"' + str_dic_stat + '\"' + \
                  ' WHERE ha_key = \"' + 'HA' + '\"'
            db_log.write_log('----- UPDATE HA INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] HA DB Update Fail.')
        except:
            LOG.exception()

        return dic_stat
    except:
        LOG.exception()
        return None
예제 #16
0
def get_cpu_usage(username, node_ip, only_value=False):
    try:
        cmd = 'sudo grep \'cpu\ \' /proc/stat'
        cmd_rt = SshCommand.ssh_exec(username, node_ip, cmd)

        ratio = float()
        if cmd_rt is None:
            LOG.info("%s CPU check Fail", node_ip)

            if only_value:
                return -1

            return {'CPU': 'Command fail'}
        else:
            if 'cpu ' in cmd_rt:
                try:
                    f = cmd_rt.split()
                    ratio = (float(f[1]) + float(f[3])) * 100 / \
                            (float(f[1]) + float(f[3]) + float(f[4]))
                except:
                    LOG.exception()

        result = {
            'CPU': {
                'RATIO': float(format(ratio, '.2f')),
                'Description': cmd_rt
            }
        }
        LOG.info(" > CPU : %s", str(format(ratio, '.2f')))

        if only_value:
            return float(format(ratio, '.2f'))

        return result
    except:
        LOG.exception()
        return -1
예제 #17
0
def get_mem_usage(username, node_ip, only_value=False):
    try:
        cmd = 'sudo free -t -m | grep Mem'
        cmd_rt = SshCommand.ssh_exec(username, node_ip, cmd)

        ratio = float()
        if cmd_rt is None:
            LOG.info("%s Memory check Fail", node_ip)

            if only_value:
                return -1

            return {'MEMORY': 'Command fail'}
        else:
            if 'Mem' in cmd_rt:
                try:
                    f = cmd_rt.split()
                    ratio = float(f[2]) * 100 / float(f[1])
                except:
                    LOG.exception()

        result = {
            'MEMORY': {
                'RATIO': float(format(ratio, '.2f')),
                'Description': cmd_rt
            }
        }
        LOG.info(" > MEMORY : %s", str(format(ratio, '.2f')))

        if only_value:
            return float(format(ratio, '.2f'))

        return result
    except:
        LOG.exception()
        return -1
예제 #18
0
    def run(self):

        # DB initiation
        DB.db_initiation()

        # Start RESTful server
        try:
            REST_SVR.rest_server_start()
        except:
            print 'Rest Server failed to start'
            LOG.exception()
            sys.exit(1)

        # Periodic monitoring
        if CONF.watchdog()['interval'] == 0:
            LOG.info("--- Not running periodic monitoring ---")
            while True:
                time.sleep(3600)
        else:
            LOG.info("--- Periodic Monitoring Start ---")

            conn = DB.connection()

            while True:
                try:
                    watchdog.periodic(conn)

                    time.sleep(CONF.watchdog()['interval'])
                except:
                    watchdog.push_event('sonawatcher', 'disconnect',
                                        'critical',
                                        'sonawatcher server shutdown',
                                        str(datetime.now()))
                    conn.close()
                    LOG.exception()
                    sys.exit(1)
예제 #19
0
    def exit(self):
        try:
            pf = file(PIDFILE, 'r')
            pid = int(pf.read().strip())
            pf.close()

            LOG.info("--- Daemon STOP [fail to check rest server] ---")

            try:
                LOG.info('PID = ' + str(pid))
                os.killpg(pid, SIGTERM)
            except OSError, err:
                err = str(err)
                if err.find("No such process") > 0:
                    if os.path.exists(self.pidfile):
                        os.remove(self.pidfile)
        except:
            LOG.exception()
예제 #20
0
def find_swarm_manager():
    hostname = ''

    try:
        url = CONF.xos()['xos_rest_server']
        account = CONF.xos()['xos_rest_account']

        cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/controllers/'
        result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        output, error = result.communicate()

        if result.returncode != 0:
            LOG.error("Cmd Fail, cause => %s", error)
            return ''

        controller_array = json.loads(output)

        for controller_info in controller_array:
            auth_url = controller_info['auth_url']
            '''
            backend_status = controller_info['backend_status']

            LOG.info('xos_sync_backend_status = ' + backend_status)

            tmp = str(backend_status).split('-')

            if tmp[0].strip() == '0':
            '''

            LOG.info('swarm_manager_auth_url = ' + auth_url)

            tmp = str(auth_url).split(':')

            hostname = tmp[0]
            break
    except:
        LOG.exception()

    return hostname
예제 #21
0
def swarm_node_check(conn, db_log, node_name, username, node_ip,
                     swarm_manager):
    node_status = 'ok'
    node_list = []
    fail_reason = []

    try:
        cmd = 'ssh root@' + swarm_manager + ' \"sudo docker node ls\"'
        node_rt = SshCommand.ssh_exec(username, node_ip, cmd)

        if node_rt is not None:
            try:
                leader_flag = False
                for line in node_rt.splitlines():
                    line = line.decode('utf-8')

                    line = " ".join(line.replace('*', '').split())
                    tmp = line.split(' ')

                    if line.startswith('ID'):
                        continue

                    if 'Leader' in line:
                        node_json = {
                            'hostname': tmp[1],
                            'status': tmp[2],
                            'availability': tmp[3],
                            'manager': tmp[4]
                        }
                        leader_flag = True

                        if not ('Ready' in line and 'Active' in line):
                            node_status = 'nok'
                            fail_reason.append(tmp[1] + ' node is not ready.')
                    else:
                        node_json = {
                            'hostname': tmp[1],
                            'status': tmp[2],
                            'availability': tmp[3],
                            'manager': ''
                        }

                    if 'Down' in line:
                        node_status = 'nok'
                        fail_reason.append(tmp[1] + ' node is down.')

                    node_list.append(node_json)

                if not leader_flag:
                    node_status = 'nok'
                    fail_reason.append('swarm leader node does not exist.')
            except:
                LOG.exception()
                node_status = 'nok'

        else:
            LOG.error("\'%s\' Swarm Node Check Error", node_ip)
            node_status = 'fail'

        try:
            sql = 'UPDATE ' + DB.SWARM_TBL + \
                  ' SET node = \"' + str(node_list) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE SWARM NODE INFO -----\n' + sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] SWARM NODE DB Update Fail.')
        except:
            LOG.exception()

    except:
        LOG.exception()
        node_status = 'fail'

    return node_status, fail_reason
예제 #22
0
def swarm_service_check(conn, db_log, node_name, username, node_ip,
                        swarm_manager):
    service_status = 'ok'
    service_list = []
    ps_list = []
    fail_reason = []

    try:
        cmd = 'ssh root@' + swarm_manager + ' \"sudo docker service ls\"'
        service_rt = SshCommand.ssh_exec(username, node_ip, cmd)

        instance_list = get_service_list()

        if service_rt is not None:
            try:
                for svc in instance_list:
                    find_flag = False
                    for line in service_rt.splitlines():
                        line = line.decode('utf-8')

                        if line.startswith('ID'):
                            continue

                        id, name, mode, rep, img = line.split()

                        if svc == name:
                            find_flag = True
                            rep_tmp = rep.split('/')

                            if not (rep_tmp[0] == rep_tmp[1]):
                                service_status = 'nok'
                                svc_json = {
                                    'name': name,
                                    'mode': mode,
                                    'replicas': rep,
                                    'image': img,
                                    'status': 'nok',
                                    'monitor_item': True
                                }
                                fail_reason.append(svc_json)
                            else:
                                svc_json = {
                                    'name': name,
                                    'mode': mode,
                                    'replicas': rep,
                                    'image': img,
                                    'status': 'ok',
                                    'monitor_item': True
                                }

                            service_list.append(svc_json)

                    if not find_flag:
                        service_status = 'nok'
                        fail_reason.append('swarm ' + svc +
                                           ' service does not exist.')
                        break

                for line in service_rt.splitlines():
                    line = line.decode('utf-8')

                    if line.startswith('ID'):
                        continue

                    id, name, mode, rep, img = line.split()

                    if name in instance_list:
                        continue

                    rep_tmp = rep.split('/')

                    if not (rep_tmp[0] == rep_tmp[1]):
                        svc_json = {
                            'name': name,
                            'mode': mode,
                            'replicas': rep,
                            'image': img,
                            'status': 'nok',
                            'monitor_item': False
                        }
                    else:
                        svc_json = {
                            'name': name,
                            'mode': mode,
                            'replicas': rep,
                            'image': img,
                            'status': 'ok',
                            'monitor_item': False
                        }

                    service_list.append(svc_json)

            except:
                LOG.exception()
                service_status = 'fail'

        else:
            LOG.error("\'%s\' Swarm Service Check Error", node_ip)
            service_status = 'fail'

        for app in instance_list:
            cmd = 'ssh root@' + swarm_manager + ' \"sudo docker service ps ' + app + '\"'
            ps_rt = SshCommand.ssh_exec(username, node_ip, cmd)

            if ps_rt is not None:
                for line in ps_rt.splitlines():
                    line = line.decode('utf-8')

                    if line.startswith('ID'):
                        continue

                    line = line.replace(' \_ ', '')

                    line = " ".join(line.split())
                    tmp = line.split(' ')

                    ps_json = {
                        'name': tmp[1],
                        'image': tmp[2],
                        'node': tmp[3],
                        'desired_state': tmp[4],
                        'current_state': tmp[5]
                    }
                    ps_list.append(ps_json)

            else:
                LOG.error("\'%s\' Swarm PS Check Error", node_ip)

        try:
            sql = 'UPDATE ' + DB.SWARM_TBL + \
                  ' SET service = \"' + str(service_list) + '\",' + \
                  ' ps = \"' + str(ps_list) + '\"' + \
                  ' WHERE nodename = \'' + node_name + '\''
            db_log.write_log('----- UPDATE SWARM SERVICE/PS INFO -----\n' +
                             sql)

            if DB.sql_execute(sql, conn) != 'SUCCESS':
                db_log.write_log('[FAIL] SWARM SERVICE/PS DB Update Fail.')
        except:
            LOG.exception()

    except:
        LOG.exception()
        service_status = 'fail'

    return service_status, fail_reason
예제 #23
0
def periodic(conn, pre_stat, db_log):
    try:
        cur_info = {}
        #LOG.info('Periodic checking %s', str(CONF.watchdog()['check_system']))

        try:
            node_list = cmd_proc.get_node_list('all', 'nodename, ip_addr, username, type, sub_type')
            if not node_list:
                LOG.info("Not Exist Node data ...")
                return
        except:
            LOG.exception()
            return

        # Read cur alarm status
        sql = 'SELECT nodename, item, grade FROM ' + DB.EVENT_TBL

        db_log.write_log(sql)
        cur_grade = conn.cursor().execute(sql).fetchall()

        old_nok_count = 0;
        for nodename, item, grade in cur_grade:
            if not cur_info.has_key(nodename):
                cur_info[nodename] = {}
            cur_info[nodename][item] = grade
            if grade != 'ok':
                old_nok_count += 1

        new_nok_count = 0;
        for node_name, node_ip, user_name, type, sub_type in node_list:
            #LOG.info('------------------------------------ ' + node_name + ' START ------------------------------------')

            onos_cluster = 'fail'
            onos_device = 'fail'
            onos_link = 'fail'
            onos_app = 'fail'

            # ping check
            ping = net_check(node_ip)
            ping_reason = []
            if ping != 'ok':
                reason.append('ping check failed on ' + node_ip)
                new_nok_count += 1
            ping = alarm_event.process_event(conn, db_log, node_name, type, 'PING', cur_info[node_name]['PING'], ping, ping_reason)

            if ping == 'ok':
                if type.upper() == 'ONOS':
                    # check connection
                    onos_cluster, onos_device, onos_link, onos_app, cluster_reason, device_reason, link_reason, app_reason = chk_onos.onos_check(conn, db_log, node_name, node_ip)
                    onos_cluster = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_CLUSTER',
                                                             cur_info[node_name]['ONOS_CLUSTER'], onos_cluster, cluster_reason)
                    onos_device = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_DEVICE',
                                                             cur_info[node_name]['ONOS_DEVICE'], onos_device, device_reason)
                    onos_link = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_LINK',
                                                             cur_info[node_name]['ONOS_LINK'], onos_link, link_reason)
                    onos_app = alarm_event.process_event(conn, db_log, node_name, type, 'ONOS_APP',
                                                             cur_info[node_name]['ONOS_APP'], onos_app, app_reason)
                    if onos_cluster != 'ok': new_nok_count += 1
                    if onos_device != 'ok': new_nok_count += 1
                    if onos_link != 'ok': new_nok_count += 1
                    if onos_app != 'ok': new_nok_count += 1

            try:
                sql = 'UPDATE ' + DB.STATUS_TBL + \
                      ' SET' + \
                      ' PING = \'' + ping + '\',' + \
                      ' ONOS_CLUSTER = \'' + onos_cluster + '\',' + \
                      ' ONOS_DEVICE = \'' + onos_device + '\',' + \
                      ' ONOS_LINK = \'' + onos_link + '\',' + \
                      ' ONOS_APP = \'' + onos_app + '\',' + \
                      ' time = \'' + str(datetime.now()) + '\'' + \
                      ' WHERE nodename = \'' + node_name + '\''
                db_log.write_log('----- UPDATE TOTAL SYSTEM INFO -----\n' + sql)

                if DB.sql_execute(sql, conn) != 'SUCCESS':
                    db_log.write_log('[FAIL] TOTAL SYSTEM INFO DB Update Fail.')
            except:
                LOG.exception()

            # do not version log on everthing is ok
            if old_nok_count > 0:
                LOG.info('chk_onos[%s]: ping=%s cluster=%s device=%s link=%s app=%s' %
                         (node_name, ping, onos_cluster, onos_device, onos_link, onos_app))

        if old_nok_count > 0 and new_nok_count == 0:
            alarm_event.process_event(conn, db_log, 'ALL', 'SITE', 'STATUS', 'none', 'ok', []) 

        # send all alarm messages pending
        alarm_event.flush_event_alarm();

    except:
        LOG.exception()

    return pre_stat
예제 #24
0
    def do_GET(self):
        # health check
        if self.path.startswith('/alive-check'):
            self.do_HEAD(200)
            self.wfile.write('ok\n')
            return

        if not self.authentication():
            self.do_HEAD(401)
            return
        else:
            if not self.headers.getheader('Content-Length'):
                self.do_HEAD(400)
                self.wfile.write('Bad Request, Content Length is 0\n')
                return
            else:
                request_size = int(self.headers.getheader("Content-Length"))
                request_string = self.rfile.read(request_size)
                request_obj = json.loads(request_string)

            LOG.info('[REST-SERVER] CLIENT INFO = ' + str(self.client_address))
            LOG.info('[REST-SERVER] RECV BODY = \n' + json.dumps(request_obj, sort_keys=True, indent=4))

            if self.path.startswith('/command'):
                try:
                    if command.exist_command(request_obj):
                        res_body = command.parse_command(request_obj)

                        self.do_HEAD(200)
                        self.wfile.write(json.dumps(res_body))

                        LOG.info('[REST-SERVER] RES BODY = \n%s',
                                 json.dumps(res_body, sort_keys=True, indent=4))
                    else:
                        self.do_HEAD(404)
                        self.wfile.write('command not found')

                        LOG.info('[REST-SERVER] ' + 'command not found')
                except:
                    LOG.exception()

            elif self.path.startswith('/regi'):
                try:
                    self.do_HEAD(200)

                    url = str(request_obj['url'])

                    res_body = command.regi_url(url, self.headers.getheader('Authorization'))

                    self.wfile.write(json.dumps(res_body))

                    LOG.info('[REST-SERVER] RES BODY = \n%s',
                             json.dumps(res_body, sort_keys=True, indent=4))
                except:
                    LOG.exception()

            elif self.path.startswith('/event_list'):
                try:
                    self.do_HEAD(200)

                    url = str(request_obj['url'])

                    res_body = command.get_event_list(url, self.headers.getheader('Authorization'))

                    self.wfile.write(json.dumps(res_body))

                    LOG.info('[REST-SERVER] RES BODY = \n%s',
                             json.dumps(res_body, sort_keys=True, indent=4))
                except:
                    LOG.exception()

            elif self.path.startswith('/unregi'):
                try:
                    self.do_HEAD(200)

                    url = str(request_obj['url'])

                    res_body = command.unregi_url(url)

                    self.wfile.write(json.dumps(res_body))

                    LOG.info('[REST-SERVER] RES BODY = \n%s',
                             json.dumps(res_body, sort_keys=True, indent=4))
                except:
                    LOG.exception()

            else:
                self.do_HEAD(404)
                self.wfile.write(self.path + ' not found\n')

                LOG.info('[REST-SERVER] ' + self.path + ' not found')
예제 #25
0
 def do_HEAD(self, res_code):
     self.send_response(res_code)
     self.send_header('Content-type', 'application/json')
     self.end_headers()
     if res_code != 200:
         LOG.info('[REST-SERVER] RESPONSE CODE = ' + str(res_code))
예제 #26
0
def rest_server_start():
    LOG.info("--- REST Server Start --- ")

    rest_server_daemon = multiprocess.Process(name='rest_server', target=run)
    rest_server_daemon.daemon = True
    rest_server_daemon.start()
예제 #27
0
            if pid > 0:
                # exit from second parent
                sys.exit(0)
        except OSError, e:
            sys.stderr.write("fork #2 failed: %d (%s)\n" %
                             (e.errno, e.strerror))
            sys.exit(1)

        # redirect standard file descriptors
        si = file(self.stdin, 'r')
        so = file(self.stdout, 'a+')
        se = file(self.stderr, 'a+', 0)

        pid = str(os.getpid())

        LOG.info("--- Daemon START ---")
        sys.stderr.write("\nstarted with pid %s\n" % pid)
        sys.stderr.flush()

        if self.pidfile:
            file(self.pidfile, 'w+').write("%s\n" % pid)

        atexit.register(self.delpid)
        os.dup2(si.fileno(), sys.stdin.fileno())
        os.dup2(so.fileno(), sys.stdout.fileno())
        os.dup2(se.fileno(), sys.stderr.fileno())

    # delete pid file when parent process kill
    def delpid(self):
        try:
            os.remove(self.pidfile)
예제 #28
0
    def run(self):
        db_log = USER_LOG()
        db_log.set_log('db.log',
                       CONF.base()['log_rotate_time'],
                       CONF.base()['log_backup_count'])

        pre_stat = dict()

        # DB initiation
        DB.db_initiation(db_log)

        # Start RESTful server
        try:
            REST_SVR.rest_server_start()
        except:
            print 'Rest Server failed to start'
            LOG.exception()
            self.exit()

        # Periodic monitoring
        if CONF.watchdog()['interval'] == 0:
            LOG.info("--- Not running periodic monitoring ---")
            while True:
                time.sleep(3600)
        else:
            LOG.info("--- Periodic Monitoring Start ---")
            history_log.write_log("--- Event History Start ---")

            conn = DB.connection()

            exitFlag = False
            while True:
                try:
                    i = 0
                    while i < 3:
                        i = i + 1
                        # check rest server
                        try:
                            url = 'http://' + socket.gethostbyname(
                                socket.gethostname()) + ':' + str(CONF.rest(
                                )['rest_server_port']) + '/alive-check'

                            cmd = 'curl -X GET \"' + url + '\"'
                            LOG.info('cmd = ' + cmd)
                            result = Popen(cmd,
                                           stdout=PIPE,
                                           stderr=PIPE,
                                           shell=True)
                            output, error = result.communicate()

                            if result.returncode != 0:
                                LOG.info('REST SERVER CHECK FAIL [' + str(i) +
                                         ']')

                                if i == 3:
                                    LOG.info('fail to check rest server.')
                                    alarm_event.push_event(
                                        'sonawatcher',
                                        'SONAWATCHER_DISCONNECT', 'critical',
                                        'normal',
                                        'sonawatcher server shutdown',
                                        str(datetime.now()))
                                    conn.close()
                                    exitFlag = True
                                    self.exit()
                                    break
                            else:
                                break

                        except:
                            LOG.exception()

                    if exitFlag:
                        break

                    pre_stat = watchdog.periodic(conn, pre_stat, db_log)

                    time.sleep(CONF.watchdog()['interval'])
                except:
                    alarm_event.push_event('sonawatcher',
                                           'SONAWATCHER_DISCONNECT',
                                           'critical', 'normal',
                                           'sonawatcher server shutdown',
                                           str(datetime.now()))
                    conn.close()
                    LOG.exception()
예제 #29
0
def swarm_check(conn, db_log, node_name, user_name, node_ip):
    str_node = ''
    str_service = ''
    str_ps = ''

    ret_app = 'ok'
    ret_node = 'ok'

    node_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker node ls')

    if node_rt is not None:
        try:
            leader_flag = False
            for line in node_rt.splitlines():
                line = line.decode('utf-8')
                str_node = str_node + line + '\n'

                if line.startswith('ID'):
                    continue

                if 'Leader' in line:
                    leader_flag = True

                    if not ('Ready' in line and 'Active' in line):
                        ret_node = 'nok'
                        break

                if 'Down' in line:
                    ret_node = 'nok'
                    break

            if not leader_flag:
                ret_node = 'nok'
        except:
            LOG.exception()
            ret_node = 'nok'

    else:
        LOG.error("\'%s\' Swarm Node Check Error", node_ip)
        str_node = 'fail'

    service_rt = SshCommand.ssh_exec(user_name, node_ip,
                                     'sudo docker service ls')

    if service_rt is not None:
        try:
            for app in CONF.swarm()['app_list']:
                find_flag = False
                for line in service_rt.splitlines():
                    line = line.decode('utf-8')

                    if line.startswith('ID'):
                        continue

                    id, name, mode, rep, img = line.split()

                    if app == name:
                        find_flag = True
                        rep_tmp = rep.split('/')

                        if not (rep_tmp[0] == rep_tmp[1]):
                            ret_app = 'nok'
                            break

                if not find_flag:
                    ret_app = 'nok'
                    break
        except:
            LOG.exception()
            ret_app = 'nok'

        for line in service_rt.splitlines():
            line = line.decode('utf-8')
            str_service = str_service + line + '\n'
    else:
        LOG.error("\'%s\' Swarm Service Check Error", node_ip)
        str_service = 'fail'
        ret_app = 'nok'

    try:
        for app in CONF.swarm()['app_list']:
            ps_rt = SshCommand.ssh_exec(user_name, node_ip,
                                        'sudo docker service ps ' + app)

            str_ps = str_ps + ' * ' + app + '\n\n'

            if ps_rt is not None:
                for line in ps_rt.splitlines():
                    line = line.decode('utf-8')
                    str_ps = str_ps + line + '\n'
            else:
                LOG.error("\'%s\' Swarm PS Check Error", node_ip)
                str_ps = str_ps + 'Command failure(' + app + ')\n'

            str_ps = str_ps + '\n'
    except:
        LOG.exception()

    try:
        sql = 'UPDATE ' + DB.SWARM_TBL + \
              ' SET node = \'' + str_node + '\',' + \
              ' service = \'' + str_service + '\',' + \
              ' ps = \'' + str_ps + '\'' + \
              ' WHERE nodename = \'' + node_name + '\''
        db_log.write_log('----- UPDATE SWARM INFO -----\n' + sql)

        if DB.sql_execute(sql, conn) != 'SUCCESS':
            db_log.write_log('[FAIL] SWARN DB Update Fail.')
    except:
        LOG.exception()

    return ret_app, ret_node
예제 #30
0
def vrouter_check(conn, db_log, node_name, user_name, node_ip):
    ret_docker = 'ok'

    docker_list = []
    fail_list = []

    onos_id = ''

    docker_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker ps')

    if docker_rt is not None:
        try:
            for docker in CONF.openstack()['docker_list']:
                for line in docker_rt.splitlines():
                    if line.startswith('CONTAINER'):
                        continue

                    tmp_line = line.split()

                    if ' ' + docker in line:
                         if not 'Up' in line:
                             docker_json = {'name': docker, 'status': 'nok', 'type': 'docker'}
                             fail_list.append(docker_json)
                             ret_docker = 'nok'
                         else:
                             docker_json = {'name': docker, 'status': 'ok', 'type': 'docker'}

                         docker_list.append(docker_json)

                    if 'onos' in tmp_line[1]:
                        onos_id = tmp_line[0]
        except:
            LOG.exception()
    else:
        LOG.error("\'%s\' Vrouter Node Check Error", node_ip)
        ret_docker = 'fail'

    onos_app_list = []
    route_list = []

    if not onos_id == '':
        try:
            # get onos container ip
            onos_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker inspect ' + onos_id + ' | grep IPAddress')

            if onos_rt is not None:
                for line in onos_rt.splitlines():
                    line = line.strip()
                    if line.startswith('\"IPAddress'):
                        tmp = line.split(':')
                        onos_ip = tmp[1].strip().replace('\"', '').replace(',', '')
                        break

                app_list = SshCommand.ssh_pexpect(user_name, node_ip, onos_ip, 'apps -a -s')

                app_active_list = list()
                for line in app_list.splitlines():
                    if line.startswith('fail'):
                        continue

                    app_active_list.append(line.split(".")[2].split()[0])

                for app in CONF.openstack()['onos_vrouter_app_list']:
                    if app in app_active_list:
                        app_json = {'name': app, 'status': 'ok', 'type': 'onos_app'}
                    else:
                        app_json = {'name': app, 'status': 'nok', 'type': 'onos_app'}
                        fail_list.append(app_json)
                        ret_docker = 'nok'

                    onos_app_list.append(app_json)

                str_route = SshCommand.ssh_pexpect(user_name, node_ip, onos_ip, 'routes')

                for line in str_route.splitlines():
                    line = line.strip()

                    if (line.startswith('Table') or line.startswith('Network') or line.startswith('Total')):
                        continue

                    new_line = " ".join(line.split())

                    if new_line.startswith('fail'):
                        continue

                    tmp = new_line.split(' ')
                    route_json = {'network': tmp[0], 'next_hop': tmp[1]}
                    route_list.append(route_json)
        except:
            LOG.exception()
    else:
        LOG.info('can not find onos_id.')
        ret_docker = 'fail'

    try:
        sql = 'UPDATE ' + DB.OPENSTACK_TBL + \
              ' SET docker = \"' + str(docker_list) + '\",' + \
              ' onosApp = \"' + str(onos_app_list) + '\",' + \
              ' routingTable = \"' + str(route_list) + '\"' + \
              ' WHERE nodename = \'' + node_name + '\''
        db_log.write_log('----- UPDATE GATEWAY INFO -----\n' + sql)

        if DB.sql_execute(sql, conn) != 'SUCCESS':
            db_log.write_log('[FAIL] GATEWAY DB Update Fail.')
    except:
        LOG.exception()

    return ret_docker, fail_list