def get_service_list(): service_list = [] try: url = CONF.xos()['xos_rest_server'] account = CONF.xos()['xos_rest_account'] cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/instances/' result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) output, error = result.communicate() if result.returncode != 0: LOG.error("Cmd Fail, cause => %s", error) return '' instance_array = json.loads(output) for instance_info in instance_array: name = instance_info['instance_name'] LOG.info('swarm_instance_name = ' + name) service_list.append(name) except: LOG.exception() return service_list
def xos_status_check(conn, db_log, node_name): xos_status = 'ok' xos_list = [] fail_reason = [] try: url = CONF.xos()['xos_rest_server'] account = CONF.xos()['xos_rest_account'] cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/xoses/' result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) output, error = result.communicate() if result.returncode != 0: LOG.error("Cmd Fail, cause => %s", error) return 'fail', None xos_array = json.loads(output) for xos_info in xos_array: backend_status = xos_info['backend_status'] LOG.info('xos_status_backend_status = ' + backend_status) tmp = str(backend_status).split('-') if tmp[0].strip() == '0': status = 'ok' else: status = 'nok' xos_json = { 'name': xos_info['name'], 'status': status, 'description': tmp[1].strip() } xos_list.append(xos_json) if status == 'nok': xos_status = 'nok' fail_reason.append(xos_json) try: sql = 'UPDATE ' + DB.XOS_TBL + \ ' SET xos_status = \"' + str(xos_list) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE XOS STATUS INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] XOS STATUS DB Update Fail.') except: LOG.exception() except: LOG.exception() xos_status = 'fail' return xos_status, fail_reason
def onos_ha_check(conn, db_log): try: stats_url = CONF.ha()['ha_proxy_server'] account = CONF.ha()['ha_proxy_account'] cmd = 'curl --user ' + account + ' --header \'Accept: text/html, application/xhtml+xml, image/jxr, */*\' \"' + stats_url + '\"' result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) output, error = result.communicate() if result.returncode != 0: LOG.error("Cmd Fail, cause => %s", error) return None else: report_data = csv.DictReader(output.lstrip('# ').splitlines()) dic_stat = dict() for row in report_data: if row['pxname'].strip() == 'stats' or row['svname'].strip( ) == 'BACKEND': continue dtl_list = { 'name': row['svname'], 'req_count': row['stot'], 'succ_count': row['hrsp_2xx'], 'node_sts': row['status'] } svc_type = row['pxname'] if (dic_stat.has_key(svc_type)): dic_stat[svc_type].append(dtl_list) else: dic_stat[svc_type] = list() dic_stat[svc_type].append(dtl_list) try: str_dic_stat = str(dic_stat) sql = 'UPDATE ' + DB.HA_TBL + \ ' SET stats = \"' + str_dic_stat + '\"' + \ ' WHERE ha_key = \"' + 'HA' + '\"' db_log.write_log('----- UPDATE HA INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] HA DB Update Fail.') except: LOG.exception() return dic_stat except: LOG.exception() return None
def occur_event(conn, node_name, item, pre_value, cur_value): time = str(datetime.now()) desc = pre_value + ' -> ' + cur_value sql = 'UPDATE ' + DB.EVENT_TBL + \ ' SET grade = \'' + cur_value + '\'' + ',' + \ ' desc = \'' + desc + '\'' + ',' + \ ' time = \'' + time + '\'' + \ ' WHERE nodename = \'' + node_name + '\' and item = \'' + item + '\'' LOG.info('Update alarm info = ' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': LOG.error('DB Update Fail.') push_event(node_name, item, cur_value, desc, time)
def onos_app_check(node): app_rt = SshCommand.onos_ssh_exec(node, 'apps -a -s') app_active_list = list() if app_rt is not None: for line in app_rt.splitlines(): app_active_list.append(line.split(".")[2].split()[0]) if set(CONF.onos()['app_list']).issubset(app_active_list): return 'ok' else: LOG.error("\'%s\' Application Check Error", node) return 'nok' else: LOG.error("\'%s\' Application Check Error", node) return 'nok'
def send_response_traffic_test_old(cond, auth): trace_result_data = {} try: is_success, result = trace.traffic_test_old(cond) if is_success: trace_result_data['result'] = 'SUCCESS' else: trace_result_data['result'] = 'FAIL' # trace_result_data['fail_reason'] = 'The source ip does not exist.' if result != None: trace_result_data['traffic_test_result'] = result trace_result_data['transaction_id'] = cond['transaction_id'] try: LOG.info('%s', json.dumps(trace_result_data, sort_keys=True, indent=4)) except: pass req_body_json = json.dumps(trace_result_data) try: url = str(cond['app_rest_url']) #requests.post(str(url), headers=header, data=req_body_json, timeout=2) if str(auth).startswith('Basic '): auth = str(auth).split(' ')[1] cmd = 'curl -X POST -u \'' + CONF.onos( )['rest_auth'] + '\' -H \'Content-Type: application/json\' -d \'' + str( req_body_json) + '\' ' + url LOG.error('%s', 'curl = ' + cmd) result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) result.communicate() if result.returncode != 0: # Push noti does not respond pass except: LOG.exception() pass except: LOG.exception()
def net_check(node): if CONF.watchdog()['method'] == 'ping': timeout = CONF.watchdog()['timeout'] if sys.platform == 'darwin': timeout = timeout * 1000 cmd = 'ping -c1 -W%d -n %s' % (timeout, node) result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) output, error = result.communicate() if result.returncode != 0: LOG.error("\'%s\' Network Check Error(%d) ", node, result.returncode) return 'nok' else: return 'ok'
def find_swarm_manager(): hostname = '' try: url = CONF.xos()['xos_rest_server'] account = CONF.xos()['xos_rest_account'] cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/controllers/' result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) output, error = result.communicate() if result.returncode != 0: LOG.error("Cmd Fail, cause => %s", error) return '' controller_array = json.loads(output) for controller_info in controller_array: auth_url = controller_info['auth_url'] ''' backend_status = controller_info['backend_status'] LOG.info('xos_sync_backend_status = ' + backend_status) tmp = str(backend_status).split('-') if tmp[0].strip() == '0': ''' LOG.info('swarm_manager_auth_url = ' + auth_url) tmp = str(auth_url).split(':') hostname = tmp[0] break except: LOG.exception() return hostname
def vrouter_check(conn, db_log, node_name, user_name, node_ip): ret_docker = 'ok' docker_list = [] fail_list = [] onos_id = '' docker_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker ps') if docker_rt is not None: try: for docker in CONF.openstack()['docker_list']: for line in docker_rt.splitlines(): if line.startswith('CONTAINER'): continue tmp_line = line.split() if ' ' + docker in line: if not 'Up' in line: docker_json = {'name': docker, 'status': 'nok', 'type': 'docker'} fail_list.append(docker_json) ret_docker = 'nok' else: docker_json = {'name': docker, 'status': 'ok', 'type': 'docker'} docker_list.append(docker_json) if 'onos' in tmp_line[1]: onos_id = tmp_line[0] except: LOG.exception() else: LOG.error("\'%s\' Vrouter Node Check Error", node_ip) ret_docker = 'fail' onos_app_list = [] route_list = [] if not onos_id == '': try: # get onos container ip onos_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker inspect ' + onos_id + ' | grep IPAddress') if onos_rt is not None: for line in onos_rt.splitlines(): line = line.strip() if line.startswith('\"IPAddress'): tmp = line.split(':') onos_ip = tmp[1].strip().replace('\"', '').replace(',', '') break app_list = SshCommand.ssh_pexpect(user_name, node_ip, onos_ip, 'apps -a -s') app_active_list = list() for line in app_list.splitlines(): if line.startswith('fail'): continue app_active_list.append(line.split(".")[2].split()[0]) for app in CONF.openstack()['onos_vrouter_app_list']: if app in app_active_list: app_json = {'name': app, 'status': 'ok', 'type': 'onos_app'} else: app_json = {'name': app, 'status': 'nok', 'type': 'onos_app'} fail_list.append(app_json) ret_docker = 'nok' onos_app_list.append(app_json) str_route = SshCommand.ssh_pexpect(user_name, node_ip, onos_ip, 'routes') for line in str_route.splitlines(): line = line.strip() if (line.startswith('Table') or line.startswith('Network') or line.startswith('Total')): continue new_line = " ".join(line.split()) if new_line.startswith('fail'): continue tmp = new_line.split(' ') route_json = {'network': tmp[0], 'next_hop': tmp[1]} route_list.append(route_json) except: LOG.exception() else: LOG.info('can not find onos_id.') ret_docker = 'fail' try: sql = 'UPDATE ' + DB.OPENSTACK_TBL + \ ' SET docker = \"' + str(docker_list) + '\",' + \ ' onosApp = \"' + str(onos_app_list) + '\",' + \ ' routingTable = \"' + str(route_list) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE GATEWAY INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] GATEWAY DB Update Fail.') except: LOG.exception() return ret_docker, fail_list
def onos_check(conn, db_log, node_name, node_ip): # called on each ONOS node in NODE_INFO_TBL try: # check cluster nodes node_list = [] node_status = 'ok' node_fail_reason = [] ret, rsp = onos_api_req(node_ip, 'onos/v1/cluster') if rsp is not None: try: node_tbl = dict() for node in rsp['nodes']: node_tbl[node['ip']] = node for onos_node in CONF.onos()['list']: if len(onos_node.split(':')) != 2: continue id = onos_node.split(':')[0] ip = onos_node.split(':')[1] if id is '' or ip is '': continue if ip in node_tbl: node = node_tbl[ip] node['id'] = id node['monitor_item'] = True if node['status'] != 'READY': node_status = 'nok' node_fail_reason.append('Node ' + id + ' DOWN') node_tbl.pop(ip) else: node = { 'id': id, 'ip': ip, 'status': 'nok', 'monitor_item': True } node_status = 'nok' node_fail_reason.append('Node ' + id + ' DOWN') node_list.append(node) for node in node_tbl.values(): node['monitor_item'] = False node_list.append(node) except: LOG.exception() LOG.error("\'%s\' ONOS Check Error(nodes)", node_ip) node_status = 'fail' # check devices device_list = [] device_status = 'ok' device_fail_reason = [] ret, rsp = onos_api_req(node_ip, 'onos/v1/devices') if rsp is not None: try: device_tbl = dict() for device in rsp['devices']: device['id'] = 'of:' + device['chassisId'].rjust(16, '0') device_tbl[device['id']] = device for id in CONF.onos()['device_list']: if id is '': continue # no config if id in device_tbl: device = device_tbl[id] device['monitor_item'] = True if not device['available']: device_status = 'nok' device_fail_reason.append('Device ' + id + ' DOWN') device_tbl.pop(id) else: device = { 'id': id, 'available': False, 'channelId': '-', 'name': '-', 'role': '-', 'monitor_item': True } device_status = 'nok' device_fail_reason.append('Device ' + id + ' DOWN') device_list.append(device) for device in device_tbl.values(): device['monitor_item'] = False device_list.append(device) except: LOG.exception() LOG.error("\'%s\' ONOS Check Error(devices)", node_ip) device_status = 'fail' else: LOG.error("\'%s\' ONOS Check Error(devices)", node_ip) device_status = 'fail' # check links link_list = [] link_status = 'ok' link_fail_reason = [] ret, rsp = onos_api_req(node_ip, 'onos/v1/links') if rsp is not None: try: link_tbl = dict() for link in rsp['links']: link['src'] = link['src']['device'] + '/' + link['src'][ 'port'] link['dst'] = link['dst']['device'] + '/' + link['dst'][ 'port'] link_tbl[link['src'] + '-' + link['dst']] = link for id in CONF.onos()['link_list']: if id is '': continue if len(id.split('-')) != 2: link = { 'src': id, 'dst': '(invalid_link_config)', 'expected': 'false', 'state': '-', 'type': "-", 'monitor_item': True } link_status = 'nok' link_fail_reason.append( 'Link ' + id + ' is configed as INVALID ID FORMAT') link_list.append(link) continue if id in link_tbl: link = link_tbl[id] link['monitor_item'] = True if link['state'] != 'ACTIVE': link_status = 'nok' link_fail_reason.append('Link ' + id + ' DOWN') link_list.append(link) link_tbl.pop(id) else: link = { 'src': id.split('-')[0], 'dst': id.split('-')[1], 'expected': 'false', 'state': '-', 'type': "-", 'monitor_item': True } link_status = 'nok' link_fail_reason.append('Link ' + id + ' DOWN') link_list.append(link) rev_id = id.split('-')[1] + '-' + id.split('-')[0] if rev_id in link_tbl: link = link_tbl[rev_id] link['monitor_item'] = True if link['state'] != 'ACTIVE': link_status = 'nok' link_fail_reason.append('Link' + rev_id + ' DOWN') link_list.append(link) link_tbl.pop(rev_id) else: link = { 'src': rev_id.split('-')[0], 'dst': rev_id.split('-')[1], 'expected': 'false', 'state': '-', 'type': "-", 'monitor_item': True } link_status = 'nok' link_fail_reason.append('Link ' + rev_id + ' DOWN') link_list.append(link) for link in link_tbl.values(): link['monitor_item'] = False link_list.append(link) except: LOG.exception() LOG.error("\'%s\' ONOS Check Error(links)", node_ip) link_status = 'fail' # check apps app_list = [] app_status = 'ok' app_fail_reason = [] ret, rsp = onos_api_req(node_ip, 'onos/v1/applications') if rsp is not None: try: active_app_list = [] for app_rsp in rsp['applications']: if app_rsp['state'] == 'ACTIVE': active_app_list.append(app_rsp['name'].replace( 'org.onosproject.', '')) for app in CONF.onos()['app_list']: if app in active_app_list: app_json = { 'name': app, 'status': 'ok', 'monitor_item': True } active_app_list.remove(app) else: app_json = { 'name': app, 'status': 'nok', 'monitor_item': True } app_status = 'nok' app_fail_reason.append(app_json) app_list.append(app_json) for app in active_app_list: app_json = { 'name': app, 'status': 'ok', 'monitor_item': False } app_list.append(app_json) except: LOG.exception() LOG.error("\'%s\' ONOS Check Error(apps)", node_ip) app_status = 'fail' else: LOG.error("\'%s\' ONOS Check Error(apps)", node_ip) link_status = 'fail' # store to db try: sql = 'UPDATE ' + DB.ONOS_TBL + \ ' SET ' + \ ' cluster = \"' + str(node_list) + '\",' \ ' device = \"' + str(device_list) + '\",' \ ' link = \"' + str(link_list) + '\",' \ ' app = \"' + str(app_list) + '\"' \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE ONOS CONNECTION INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] ONOS CONNECTION DB Update Fail.') except: LOG.exception() except: LOG.exception() cluster_status = 'fail' device_status = 'fail' link_status = 'fail' app_status = 'fail' return node_status, device_status, link_status, app_status, node_fail_reason, device_fail_reason, link_fail_reason, app_fail_reason
def swarm_service_check(conn, db_log, node_name, username, node_ip, swarm_manager): service_status = 'ok' service_list = [] ps_list = [] fail_reason = [] try: cmd = 'ssh root@' + swarm_manager + ' \"sudo docker service ls\"' service_rt = SshCommand.ssh_exec(username, node_ip, cmd) instance_list = get_service_list() if service_rt is not None: try: for svc in instance_list: find_flag = False for line in service_rt.splitlines(): line = line.decode('utf-8') if line.startswith('ID'): continue id, name, mode, rep, img = line.split() if svc == name: find_flag = True rep_tmp = rep.split('/') if not (rep_tmp[0] == rep_tmp[1]): service_status = 'nok' svc_json = { 'name': name, 'mode': mode, 'replicas': rep, 'image': img, 'status': 'nok', 'monitor_item': True } fail_reason.append(svc_json) else: svc_json = { 'name': name, 'mode': mode, 'replicas': rep, 'image': img, 'status': 'ok', 'monitor_item': True } service_list.append(svc_json) if not find_flag: service_status = 'nok' fail_reason.append('swarm ' + svc + ' service does not exist.') break for line in service_rt.splitlines(): line = line.decode('utf-8') if line.startswith('ID'): continue id, name, mode, rep, img = line.split() if name in instance_list: continue rep_tmp = rep.split('/') if not (rep_tmp[0] == rep_tmp[1]): svc_json = { 'name': name, 'mode': mode, 'replicas': rep, 'image': img, 'status': 'nok', 'monitor_item': False } else: svc_json = { 'name': name, 'mode': mode, 'replicas': rep, 'image': img, 'status': 'ok', 'monitor_item': False } service_list.append(svc_json) except: LOG.exception() service_status = 'fail' else: LOG.error("\'%s\' Swarm Service Check Error", node_ip) service_status = 'fail' for app in instance_list: cmd = 'ssh root@' + swarm_manager + ' \"sudo docker service ps ' + app + '\"' ps_rt = SshCommand.ssh_exec(username, node_ip, cmd) if ps_rt is not None: for line in ps_rt.splitlines(): line = line.decode('utf-8') if line.startswith('ID'): continue line = line.replace(' \_ ', '') line = " ".join(line.split()) tmp = line.split(' ') ps_json = { 'name': tmp[1], 'image': tmp[2], 'node': tmp[3], 'desired_state': tmp[4], 'current_state': tmp[5] } ps_list.append(ps_json) else: LOG.error("\'%s\' Swarm PS Check Error", node_ip) try: sql = 'UPDATE ' + DB.SWARM_TBL + \ ' SET service = \"' + str(service_list) + '\",' + \ ' ps = \"' + str(ps_list) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE SWARM SERVICE/PS INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] SWARM SERVICE/PS DB Update Fail.') except: LOG.exception() except: LOG.exception() service_status = 'fail' return service_status, fail_reason
def onos_app_check(conn, db_log, node_name, node_ip): try: app_rt = SshCommand.onos_ssh_exec(node_ip, 'apps -a -s') status = 'ok' app_active_list = list() app_list = [] fail_reason = [] if app_rt is not None: for line in app_rt.splitlines(): app_active_list.append(line.split(".")[2].split()[0]) if not 'cpman' in app_active_list: # activate cpman LOG.info('Cpman does not exist. Activate cpman') SshCommand.onos_ssh_exec(node_ip, 'app activate org.onosproject.cpman') for app in CONF.onos()['app_list']: if app in app_active_list: app_json = { 'name': app, 'status': 'ok', 'monitor_item': True } app_active_list.remove(app) else: status = 'nok' app_json = { 'name': app, 'status': 'nok', 'monitor_item': True } fail_reason.append(app_json) app_list.append(app_json) for app in app_active_list: app_json = {'name': app, 'status': 'ok', 'monitor_item': False} app_list.append(app_json) else: LOG.error("\'%s\' ONOS Application Check Error", node_ip) status = 'fail' app_list = 'fail' try: sql = 'UPDATE ' + DB.ONOS_TBL + \ ' SET applist = \"' + str(app_list) + '\"' +\ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE ONOS APP INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] ONOS APP DB Update Fail.') except: LOG.exception() except: LOG.exception() status = 'fail' return status, fail_reason
def onos_node_check(conn, db_log, node_name, node_ip): try: node_rt = SshCommand.onos_ssh_exec(node_ip, 'openstack-nodes') node_list = [] port_list = [] fail_reason = [] ip_list = [] node_status = 'ok' if node_rt is not None: for ip in CONF.openstack()['compute_list'] + CONF.openstack( )['gateway_list']: ip = str(ip).split(':')[1] find_flag = False for line in node_rt.splitlines(): if (not (line.startswith('Total') or line.startswith('Hostname')) ) and ' ' + ip + ' ' in line: find_flag = True fail_flag = False new_line = " ".join(line.split()) tmp = new_line.split(' ') host_name = tmp[0] node_type = tmp[1] of_id = tmp[2] if not 'COMPLETE' in line: node_status = 'nok' fail_flag = True try: sql = 'SELECT nodename FROM ' + DB.NODE_INFO_TBL + ' WHERE ip_addr = \'' + ip + '\'' openstack_nodename = conn.cursor().execute( sql).fetchone()[0] if tmp[3].startswith('of:'): manage_ip = tmp[4] data_ip = tmp[5] state = tmp[6] else: manage_ip = tmp[3] data_ip = tmp[4] state = tmp[5] sql = 'UPDATE ' + DB.OPENSTACK_TBL + \ ' SET data_ip = \'' + data_ip + '\',' + \ ' hostname = \'' + host_name + '\',' + \ ' of_id = \'' + of_id + '\'' + \ ' WHERE nodename = \'' + openstack_nodename + '\'' db_log.write_log( '----- UPDATE OPENSTACK INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log( '[FAIL] OPENSTACK DATA IP Update Fail.') except: LOG.exception() port_rt = SshCommand.onos_ssh_exec( node_ip, 'openstack-node-check ' + host_name) host_port_list = [] port_status = 'ok' if port_rt is not None: for port_line in port_rt.splitlines(): if port_line.startswith( '[') or port_line.strip() == '': continue tmp = port_line.split(' ') if not port_line.startswith('OK'): rest_json = { 'port_name': tmp[1].split('=')[0], 'status': 'nok' } fail_flag = True port_status = 'nok' node_status = 'nok' else: rest_json = { 'port_name': tmp[1].split('=')[0], 'status': 'ok' } host_port_list.append(rest_json) else: node_status = 'nok' port_status = 'nok' port_json = { 'hostname': host_name, 'port_list': host_port_list } port_list.append(port_json) rest_json = { 'hostname': host_name, 'type': node_type, 'of_id': of_id, 'management_ip': manage_ip, 'data_ip': data_ip, 'state': state, 'port_status': port_status, 'monitor_item': True } node_list.append(rest_json) ip_list.append(manage_ip) if fail_flag: fail_reason.append(rest_json) if not find_flag: rest_json = { 'hostname': '-', 'type': '-', 'of_id': '-', 'port_status': 'nok', 'management_ip': ip, 'data_ip': '-', 'state': 'NO_EXIST', 'monitor_item': True } node_list.append(rest_json) node_status = 'nok' fail_reason.append(rest_json) for line in node_rt.splitlines(): if not (line.startswith('Total') or line.startswith('Hostname')): new_line = " ".join(line.split()) tmp = new_line.split(' ') if tmp[3].startswith('of:'): manage_ip = tmp[4] data_ip = tmp[5] state = tmp[6] else: manage_ip = tmp[3] data_ip = tmp[4] state = tmp[5] if not manage_ip in ip_list: rest_json = { 'hostname': tmp[0], 'type': tmp[1], 'of_id': tmp[2], 'management_ip': manage_ip, 'data_ip': data_ip, 'state': state, 'monitor_item': False } node_list.append(rest_json) else: LOG.error("\'%s\' ONOS Node Check Error", node_ip) node_status = 'fail' node_list = 'fail' try: sql = 'UPDATE ' + DB.ONOS_TBL + \ ' SET nodelist = \"' + str(node_list) + '\",' + \ ' port = \"' + str(port_list) + '\"' \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE ONOS NODE INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] ONOS NODE Update Fail.') except: LOG.exception() except: LOG.exception() node_status = 'fail' return node_status, fail_reason
def onos_conn_check(conn, db_log, node_name, node_ip): try: device_rt = SshCommand.onos_ssh_exec(node_ip, 'devices') nodes_rt = SshCommand.onos_ssh_exec(node_ip, 'nodes') of_status = 'ok' of_list = [] of_fail_reason = [] cluster_list = [] cluster_fail_reason = [] find_list = [] if device_rt is not None: try: sql = 'SELECT hostname, of_id FROM ' + DB.OPENSTACK_TBL nodes_info = conn.cursor().execute(sql).fetchall() for hostname, switch_id in nodes_info: for line in device_rt.splitlines(): if line.startswith('id=of'): find_list.append(switch_id) of_id = line.split(',')[0].split('=')[1] available = line.split(',')[1].split('=')[1] if switch_id == of_id: rest_json = parse_openflow( line, str(hostname), True) if not available == 'true': of_status = 'nok' of_fail_reason.append(rest_json) of_list.append(rest_json) for line in device_rt.splitlines(): if line.startswith('id=of'): of_id = line.split(',')[0].split('=')[1] if not of_id in find_list: rest_json = parse_openflow(line, '', False) of_list.append(rest_json) except: LOG.exception() LOG.error("\'%s\' Connection Check Error(devices)", node_ip) of_status = 'fail' else: LOG.error("\'%s\' Connection Check Error(devices)", node_ip) of_status = 'fail' cluster_status = 'ok' if nodes_rt is not None: try: sql = 'SELECT ip_addr FROM ' + DB.NODE_INFO_TBL + ' WHERE type = \'ONOS\'' nodes_info = conn.cursor().execute(sql).fetchall() cluster_ip_list = list() for onos_ip in nodes_info: find_flag = False summary_rt = SshCommand.onos_ssh_exec( onos_ip[0], 'summary') if summary_rt is not None: data_ip = str(summary_rt).split(',')[0].split('=')[1] for line in nodes_rt.splitlines(): id = line.split(',')[0].split('=')[1] address = line.split(',')[1].split('=')[1] state = line.split(',')[2].split('=')[1].split( ' ')[0] if data_ip == address.split(':')[0]: find_flag = True cluster_ip_list.append(address) rest_json = { 'id': id, 'address': address, 'status': 'ok', 'monitor_item': True } cluster_list.append(rest_json) if not state == 'READY': cluster_status = 'nok' cluster_fail_reason.append(rest_json) if not find_flag: rest_json = { 'id': data_ip, 'address': '-', 'status': 'nok', 'monitor_item': True } cluster_list.append(rest_json) cluster_status = 'nok' cluster_fail_reason.append(rest_json) else: rest_json = { 'id': onos_ip, 'address': '-', 'status': 'nok', 'monitor_item': True } cluster_list.append(rest_json) if summary_rt is not None: for line in nodes_rt.splitlines(): id = line.split(',')[0].split('=')[1] address = line.split(',')[1].split('=')[1] state = line.split(',')[2].split('=')[1].split(' ')[0] if not state == 'READY': status = 'nok' else: status = 'ok' if not address in cluster_ip_list: rest_json = { 'id': id, 'address': address, 'status': status, 'monitor_item': True } cluster_list.append(rest_json) except: pass else: LOG.error("\'%s\' Connection Check Error(nodes)", node_ip) cluster_status = 'fail' try: sql = 'UPDATE ' + DB.ONOS_TBL + \ ' SET openflow = \"' + str(of_list) + '\",' + \ ' cluster = \"' + str(cluster_list) + '\"' \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE ONOS CONNECTION INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] ONOS CONNECTION DB Update Fail.') except: LOG.exception() except: LOG.exception() of_status = 'fail' cluster_status = 'fail' return of_status, cluster_status, of_fail_reason, cluster_fail_reason
def periodic(conn): cur_info = {} LOG.info("Periodic checking...%s", str(CONF.watchdog()['check_system'])) try: node_list = cmd_proc.get_node_list('all', 'nodename, ip_addr, username') if not node_list: LOG.info("Not Exist Node data ...") return except: LOG.exception() return # Read cur alarm status sql = 'SELECT nodename, item, grade FROM ' + DB.EVENT_TBL LOG.info(sql) cur_grade = conn.cursor().execute(sql).fetchall() for nodename, item, grade in cur_grade: if not cur_info.has_key(nodename): cur_info[nodename] = {} cur_info[nodename][item] = grade for node_name, node_ip, user_name in node_list: ping = net_check(node_ip) app = 'fail' cpu = '-1' mem = '-1' disk = '-1' if ping == 'ok': if node_ip in str(CONF.onos()['list']): app = onos_app_check(node_ip) elif node_ip in str(CONF.xos()['list']): app = xos_app_check(node_ip) elif node_ip in str(CONF.swarm()['list']): app = swarm_app_check(node_ip) elif node_ip in str(CONF.openstack()['list']): app = openstack_app_check(node_ip) cpu = str(resource.get_cpu_usage(user_name, node_ip, True)) mem = str(resource.get_mem_usage(user_name, node_ip, True)) disk = str(resource.get_disk_usage(user_name, node_ip, True)) try: sql = 'UPDATE ' + DB.RESOURCE_TBL + \ ' SET cpu = \'' + cpu + '\',' + \ ' memory = \'' + mem + '\',' + \ ' disk = \'' + disk + '\'' \ ' WHERE nodename = \'' + node_name + '\'' LOG.info('Update Resource info = ' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': LOG.error('DB Update Fail.') except: LOG.exception() # occur event (rest) # 1. ping check if cur_info[node_name]['ping'] != ping: occur_event(conn, node_name, 'ping', cur_info[node_name]['ping'], ping) # 2. app check if cur_info[node_name]['app'] != app: occur_event(conn, node_name, 'app', cur_info[node_name]['app'], app) # 3. resource check (CPU/MEM/DISK) cpu_grade = 'fail' if CONF.alarm().has_key('cpu'): cpu_grade = get_grade('cpu', cpu) if cur_info[node_name]['cpu'] != cpu_grade: occur_event(conn, node_name, 'cpu', cur_info[node_name]['cpu'], cpu_grade) mem_grade = 'fail' if CONF.alarm().has_key('memory'): mem_grade = get_grade('memory', mem) if cur_info[node_name]['memory'] != mem_grade: occur_event(conn, node_name, 'memory', cur_info[node_name]['memory'], mem_grade) disk_grade = 'fail' if CONF.alarm().has_key('disk'): disk_grade = get_grade('disk', disk) if cur_info[node_name]['disk'] != disk_grade: occur_event(conn, node_name, 'disk', cur_info[node_name]['disk'], disk_grade) try: sql = 'UPDATE ' + DB.STATUS_TBL + \ ' SET cpu = \'' + cpu_grade + '\',' + \ ' memory = \'' + mem_grade + '\',' + \ ' disk = \'' + disk_grade + '\',' + \ ' ping = \'' + ping + '\',' + \ ' app = \'' + app + '\',' + \ ' time = \'' + str(datetime.now()) + '\'' + \ ' WHERE nodename = \'' + node_name + '\'' LOG.info('Update Status info = ' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': LOG.error('DB Update Fail.') except: LOG.exception()
def onos_rest_check(conn, db_log, node_name, node_ip): try: web_status = 'ok' web_list = [] fail_reason = [] web_rt = SshCommand.onos_ssh_exec(node_ip, 'web:list') if web_rt is not None: for web in CONF.onos()['rest_list']: for line in web_rt.splitlines(): if line.startswith('ID') or line.startswith('--'): continue if ' ' + web + ' ' in line: if not ('Active' in line and 'Deployed' in line): rest_json = { 'name': web, 'status': 'nok', 'monitor_item': True } fail_reason.append(rest_json) web_status = 'nok' else: rest_json = { 'name': web, 'status': 'ok', 'monitor_item': True } web_list.append(rest_json) for line in web_rt.splitlines(): if line.startswith('ID') or line.startswith('--'): continue name = " ".join(line.split()).split(' ')[10] if not name in CONF.onos()['rest_list']: if not ('Active' in line and 'Deployed' in line): rest_json = { 'name': name, 'status': 'nok', 'monitor_item': False } else: rest_json = { 'name': name, 'status': 'ok', 'monitor_item': False } web_list.append(rest_json) else: LOG.error("\'%s\' ONOS Rest Check Error", node_ip) web_status = 'fail' web_list = 'fail' try: sql = 'UPDATE ' + DB.ONOS_TBL + \ ' SET weblist = \"' + str(web_list) + '\"' +\ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE ONOS REST INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] ONOS REST DB Update Fail.') except: LOG.exception() except: LOG.exception() web_status = 'fail' return web_status, fail_reason
def swarm_node_check(conn, db_log, node_name, username, node_ip, swarm_manager): node_status = 'ok' node_list = [] fail_reason = [] try: cmd = 'ssh root@' + swarm_manager + ' \"sudo docker node ls\"' node_rt = SshCommand.ssh_exec(username, node_ip, cmd) if node_rt is not None: try: leader_flag = False for line in node_rt.splitlines(): line = line.decode('utf-8') line = " ".join(line.replace('*', '').split()) tmp = line.split(' ') if line.startswith('ID'): continue if 'Leader' in line: node_json = { 'hostname': tmp[1], 'status': tmp[2], 'availability': tmp[3], 'manager': tmp[4] } leader_flag = True if not ('Ready' in line and 'Active' in line): node_status = 'nok' fail_reason.append(tmp[1] + ' node is not ready.') else: node_json = { 'hostname': tmp[1], 'status': tmp[2], 'availability': tmp[3], 'manager': '' } if 'Down' in line: node_status = 'nok' fail_reason.append(tmp[1] + ' node is down.') node_list.append(node_json) if not leader_flag: node_status = 'nok' fail_reason.append('swarm leader node does not exist.') except: LOG.exception() node_status = 'nok' else: LOG.error("\'%s\' Swarm Node Check Error", node_ip) node_status = 'fail' try: sql = 'UPDATE ' + DB.SWARM_TBL + \ ' SET node = \"' + str(node_list) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE SWARM NODE INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] SWARM NODE DB Update Fail.') except: LOG.exception() except: LOG.exception() node_status = 'fail' return node_status, fail_reason
def xos_sync_check(conn, db_log, node_name): swarm_sync = 'ok' sync_list = [] fail_reason = [] try: url = CONF.xos()['xos_rest_server'] account = CONF.xos()['xos_rest_account'] cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/diags/' result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) output, error = result.communicate() if result.returncode != 0: LOG.error("Cmd Fail, cause => %s", error) return 'fail', None sync_array = json.loads(output) for xos_info in sync_array: backend_status = xos_info['backend_status'] LOG.info('xos_sync_backend_status = ' + backend_status) tmp = str(backend_status).split('-') if tmp[0].strip() in ['0', '1']: status = 'ok' else: status = 'nok' # check time last_time = json.loads(xos_info['backend_register'])['last_run'] cur_time = time.time() interval = cur_time - last_time interval = int(interval) if interval >= 30: status = 'nok' xos_json = { 'name': xos_info['name'], 'status': status, 'description': tmp[1].strip(), 'last_run_interval': interval } sync_list.append(xos_json) if status == 'nok': swarm_sync = 'nok' fail_reason.append(xos_json) try: sql = 'UPDATE ' + DB.XOS_TBL + \ ' SET synchronizer = \"' + str(sync_list) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE SYNCHRONIZER INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] SYNCHRONIZER DB Update Fail.') except: LOG.exception() except: LOG.exception() swarm_sync = 'fail' return swarm_sync, fail_reason
def swarm_check(conn, db_log, node_name, user_name, node_ip): str_node = '' str_service = '' str_ps = '' ret_app = 'ok' ret_node = 'ok' node_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker node ls') if node_rt is not None: try: leader_flag = False for line in node_rt.splitlines(): line = line.decode('utf-8') str_node = str_node + line + '\n' if line.startswith('ID'): continue if 'Leader' in line: leader_flag = True if not ('Ready' in line and 'Active' in line): ret_node = 'nok' break if 'Down' in line: ret_node = 'nok' break if not leader_flag: ret_node = 'nok' except: LOG.exception() ret_node = 'nok' else: LOG.error("\'%s\' Swarm Node Check Error", node_ip) str_node = 'fail' service_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker service ls') if service_rt is not None: try: for app in CONF.swarm()['app_list']: find_flag = False for line in service_rt.splitlines(): line = line.decode('utf-8') if line.startswith('ID'): continue id, name, mode, rep, img = line.split() if app == name: find_flag = True rep_tmp = rep.split('/') if not (rep_tmp[0] == rep_tmp[1]): ret_app = 'nok' break if not find_flag: ret_app = 'nok' break except: LOG.exception() ret_app = 'nok' for line in service_rt.splitlines(): line = line.decode('utf-8') str_service = str_service + line + '\n' else: LOG.error("\'%s\' Swarm Service Check Error", node_ip) str_service = 'fail' ret_app = 'nok' try: for app in CONF.swarm()['app_list']: ps_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker service ps ' + app) str_ps = str_ps + ' * ' + app + '\n\n' if ps_rt is not None: for line in ps_rt.splitlines(): line = line.decode('utf-8') str_ps = str_ps + line + '\n' else: LOG.error("\'%s\' Swarm PS Check Error", node_ip) str_ps = str_ps + 'Command failure(' + app + ')\n' str_ps = str_ps + '\n' except: LOG.exception() try: sql = 'UPDATE ' + DB.SWARM_TBL + \ ' SET node = \'' + str_node + '\',' + \ ' service = \'' + str_service + '\',' + \ ' ps = \'' + str_ps + '\'' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE SWARM INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] SWARN DB Update Fail.') except: LOG.exception() return ret_app, ret_node
def do_POST(self): if not self.authentication(): self.do_HEAD(401) self.wfile.write(str({"result": "FAIL"})) else: if self.path.startswith('/trace_request'): trace_mandatory_field = [ 'command', 'transaction_id', 'app_rest_url', 'matchingfields' ] matching_mandatory_field = ['source_ip', 'destination_ip'] trace_condition_json = self.get_content() if not trace_condition_json: return else: if (not all(x in dict(trace_condition_json['matchingfields']).keys() for x in matching_mandatory_field))\ or (not all(x in dict(trace_condition_json).keys() for x in trace_mandatory_field)): self.do_HEAD(400) self.wfile.write( str({ "result": "FAIL", "fail_reason": "Not Exist Mandatory Attribute\n" })) return elif (valid_IPv4(trace_condition_json['matchingfields']['source_ip']) == False) or \ (valid_IPv4(trace_condition_json['matchingfields']['destination_ip']) == False): self.do_HEAD(400) self.wfile.write( str({ "result": "FAIL", "fail_reason": "Type of IP Address is wrong\n" })) return else: # process trace, send noti process_thread = threading.Thread( target=send_response_trace_test, args=( trace_condition_json, str(self.headers.getheader("Authorization")))) process_thread.daemon = False process_thread.start() self.do_HEAD(200) self.wfile.write(str({"result": "SUCCESS"})) # traffic test for exited VM elif self.path.startswith('/traffictest_request'): trace_mandatory_field = [ 'command', 'transaction_id', 'app_rest_url', 'traffic_test_list' ] test_mandatory_field = [ 'node', 'instance_id', 'vm_user_id', 'vm_user_password', 'traffic_test_command' ] trace_condition_json = self.get_content() if not trace_condition_json: return else: if not all(x in dict(trace_condition_json).keys() for x in trace_mandatory_field): self.do_HEAD(400) self.wfile.write( str({ "result": "FAIL", "fail_reason": "Not Exist Mandatory Attribute\n" })) return else: for test in trace_condition_json['traffic_test_list']: if not all(x in dict(test).keys() for x in test_mandatory_field): self.do_HEAD(400) self.wfile.write( str({ "result": "FAIL", "fail_reason": "Not Exist Mandatory Attribute\n" })) return for x in test_mandatory_field: if len(test[x]) == 0: self.do_HEAD(400) self.wfile.write( str({ "result": "FAIL", "fail_reason": x + " condition empty\n" })) return # process traffic test, send noti process_thread = threading.Thread( target=send_response_traffic_test_old, args=( trace_condition_json, str(self.headers.getheader("Authorization")))) process_thread.daemon = False process_thread.start() self.do_HEAD(200) self.wfile.write(str({"result": "SUCCESS"})) # create VM and traffic test for exclusive performance VM elif self.path.startswith('/tperf_request'): perf_mandatory_field = [ 'transaction_id', 'app_rest_url', 'server', 'client', 'test_options' ] perf_condition = dict(self.get_content()) if not perf_condition: return else: if not all(x in perf_condition.keys() for x in perf_mandatory_field): LOG.error( '[Data Check Fail] Mandatory Attribute is Wrong') self.do_HEAD(400) self.wfile.write( str({ "result": "FAIL", "fail_reason": "Not Exist Mandatory Attribute\n" })) return else: # process traffic test, send noti process_thread = threading.Thread( target=tperf_test_run, args=(perf_condition, )) process_thread.daemon = False LOG.info("[Run Traffic Test Start] ---- ") process_thread.start() self.do_HEAD(200) self.wfile.write(str({"result": "SUCCESS"})) else: self.do_HEAD(404) self.wfile.write( str({ "result": "FAIL", "fail_reason": "Not Found path \"" + self.path + "\"\n" }))