def rx_tx_check(user_name, node_ip): try: port_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo ovs-ofctl show br-int') err_dict = dict() patch_port = None if port_rt is not None: for line in port_rt.splitlines(): if '(vxlan)' in line: vxlan_port = line.split('(')[0].strip() elif '(patch-intg)' in line: patch_port = line.split('(')[0].strip() port_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo ovs-ofctl dump-ports br-int ' + vxlan_port) if port_rt is not None: line = port_rt.splitlines() if '?' in line[1]: line[1] = line[1].replace('?', '0') if '?' in line[2]: line[2] = line[2].replace('?', '0') tmp = line[1].split(',') rx_packet_cnt = int(tmp[0].split('=')[1]) err_dict['rx_drop'] = int(tmp[2].split('=')[1]) err_dict['rx_err'] = int(tmp[3].split('=')[1]) tmp = line[2].split(',') tx_packet_cnt = int(tmp[0].split('=')[1]) err_dict['tx_drop'] = int(tmp[2].split('=')[1]) err_dict['tx_err'] = int(tmp[3].split('=')[1]) else: rx_packet_cnt = -1 tx_packet_cnt = -1 patch_tx_packet_cnt = -1 # find patch port if not patch_port is None: port_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo ovs-ofctl dump-ports br-int ' + patch_port) if port_rt is not None: line = port_rt.splitlines() if '?' in line[2]: line[2] = line[2].replace('?', '0') tmp = line[2].split(',') patch_tx_packet_cnt = int(tmp[0].split('=')[1]) return rx_packet_cnt, tx_packet_cnt, err_dict, patch_tx_packet_cnt except: LOG.exception() return -1, -1, err_dict, -1
def get_mem_usage(username, node_ip, only_value=False): cmd = 'free -t -m | grep Mem' cmd_rt = SshCommand.ssh_exec(username, node_ip, cmd) ratio = float() if cmd_rt is None: LOG.info("%s Memory check Fail", node_ip) if only_value: return -1 return {'MEMORY': 'Command fail'} else: if 'Mem' in cmd_rt: LOG.info("cmd_rt %s", cmd_rt) try: f = cmd_rt.split() ratio = float(f[2]) * 100 / float(f[1]) except: LOG.exception() result = { 'MEMORY': { 'RATIO': float(format(ratio, '.2f')), 'Description': cmd_rt } } LOG.info(" Memory check ... %s", result) if only_value: return float(format(ratio, '.2f')) return result
def proc_dis_log(node, param): cmd = 'ld' try: if param == 'debug': cmd = 'ld -l DEBUG' elif param == 'info': cmd = 'ld -l INFO' elif param == 'error': cmd = 'ld -l ERROR' elif param == 'exception': cmd = 'log:exception-display' nodes_info = get_node_list(node, 'nodename, ip_addr, type') res_result = dict() for node_name, ip, type in nodes_info: if type.upper() == 'ONOS': log_crt = SshCommand.onos_ssh_exec(ip, cmd) if log_crt is not None: res_result[node_name] = log_crt else: res_result[node_name] = 'FAIL' return res_result except: LOG.exception() return {'Result': 'FAIL'}
def get_cpu_usage(username, node_ip, only_value=False): cmd = 'grep \'cpu\ \' /proc/stat' cmd_rt = SshCommand.ssh_exec(username, node_ip, cmd) ratio = float() if cmd_rt is None: LOG.info("%s CPU check Fail", node_ip) if only_value: return -1 return {'CPU': 'Command fail'} else: if 'cpu ' in cmd_rt: LOG.info("cmd_rt: %s", cmd_rt) try: f = cmd_rt.split() ratio = (float(f[1]) + float(f[3])) * 100 / \ (float(f[1]) + float(f[3]) + float(f[4])) except: LOG.exception() result = { 'CPU': { 'RATIO': float(format(ratio, '.2f')), 'Description': cmd_rt } } LOG.info(" CPU check ... %s", result) if only_value: return float(format(ratio, '.2f')) return result
def get_disk_usage(username, node_ip, only_value=False): cmd = 'df -h / | grep -v Filesystem' cmd_rt = SshCommand.ssh_exec(username, node_ip, cmd) ratio = float() if cmd_rt is None: LOG.info("%s Diksk check Fail", node_ip) if only_value: return -1 return {'DISK': 'Command fail'} else: if '/' in cmd_rt: LOG.info("cmd_rt %s", cmd_rt) try: ratio = float(cmd_rt.split()[-2].replace('%', '')) except: LOG.exception() result = { 'DISK': { 'RATIO': float(format(ratio, '.2f')), 'Description': cmd_rt } } LOG.info(" Disk check ... %s", result) if only_value: return float(format(ratio, '.2f')) return result
def onos_app_check(node): app_rt = SshCommand.onos_ssh_exec(node, 'apps -a -s') app_active_list = list() if app_rt is not None: for line in app_rt.splitlines(): app_active_list.append(line.split(".")[2].split()[0]) if set(CONF.onos()['app_list']).issubset(app_active_list): return 'ok' else: LOG.error("\'%s\' Application Check Error", node) return 'nok' else: LOG.error("\'%s\' Application Check Error", node) return 'nok'
def proc_shell_cmd(node, cmd): try: nodes_info = get_node_list(node, 'username, ip_addr', DB.NODE_INFO_TBL) if len(nodes_info) == 0: return {'fail': 'This is not a command on the target system.'} for username, ip in nodes_info: res_result = dict() cmd_rt = SshCommand.ssh_exec(username, ip, cmd) if not cmd_rt is None: res_result[node] = str(cmd_rt) else: return {'fail': 'Invalid command.'} return res_result except: LOG.exception()
def proc_onos_cmd(node, cmd): try: nodes_info = get_node_list(node, 'ip_addr, type', DB.NODE_INFO_TBL) if len(nodes_info) == 0: return {'fail': 'This is not a command on the target system.'} for ip, type in nodes_info: if not type == 'ONOS': return {'fail': 'This is not a command on the target system.'} else: res_result = dict() cmd_rt = SshCommand.onos_ssh_exec(ip, cmd) if not cmd_rt is None: res_result[node] = str(cmd_rt) else: return {'fail': 'Invalid command.'} return res_result except: LOG.exception()
def get_gw_ratio_compute(conn, db_log, node_ip, node_name, pre_stat): status = 'ok' reason = [] try: sql = 'SELECT ' + DB.ONOS_TBL + '.nodename, nodelist, ip_addr' + ' FROM ' + DB.ONOS_TBL + \ ' INNER JOIN ' + DB.NODE_INFO_TBL + ' ON ' + DB.ONOS_TBL + '.nodename = ' + DB.NODE_INFO_TBL + '.nodename' nodes_info = conn.cursor().execute(sql).fetchall() if len(nodes_info) == 0: LOG.info('Fail to load onos list') return 'fail', pre_stat, reason manage_ip = '' hostname = '' for nodename, nodelist, ip in nodes_info: if not nodelist == 'none': for node_info in eval(nodelist): try: if dict(node_info)['management_ip'] == node_ip: manage_ip = ip hostname = dict(node_info)['hostname'] except: manage_ip = '' if not manage_ip == '': break if not manage_ip == '': break if hostname == '': LOG.info('Can not find hostname') return 'fail', pre_stat, reason try: sql = 'SELECT of_id FROM ' + DB.OPENSTACK_TBL + ' WHERE hostname = \'' + str(hostname) + '\'' LOG.info(sql) node_info = conn.cursor().execute(sql).fetchone() of_id = node_info[0] except: LOG.exception() LOG.info('Can not find of_id') return 'fail', pre_stat, reason group_rt = SshCommand.onos_ssh_exec(manage_ip, 'groups') total_cnt = 0 gw_list = [] if group_rt is not None: for line in group_rt.splitlines(): if of_id in line: tmp = line.split(',') for col in tmp: if 'packets=' in col: total_cnt = total_cnt + int(col.split('=')[1]) gw_list.append(int(col.split('=')[1])) str_ratio = '' if not dict(pre_stat).has_key(node_name + '_GW'): status = '-' json_ratio = {'ratio': '-', 'status': status, 'period':CONF.watchdog()['interval'], 'status': status} else: i = 0 for gw in gw_list: cur_gw = gw - pre_stat[node_name + '_GW']['gw_list'][i] cur_total = total_cnt - pre_stat[node_name + '_GW']['gw_total'] LOG.info('cur_gw = ' + str(cur_gw)) LOG.info('cur_total = ' + str(cur_total)) if cur_gw == 0 and cur_total == 0: ratio = 100/len(gw_list) elif cur_gw <= 0 or cur_total <= 0: ratio = 0 else: ratio = float(cur_gw) * 100 / cur_total i = i + 1 str_ratio = str_ratio + str(ratio) + ':' if ratio < float(CONF.alarm()['gw_ratio']): status = 'nok' json_ratio = {'ratio': str_ratio.rstrip(':'), 'status': status, 'period':CONF.watchdog()['interval'], 'status': status} LOG.info('[COMPUTE] ' + 'GW_RATIO = ' + str_ratio.rstrip(':')) try: sql = 'UPDATE ' + DB.OPENSTACK_TBL + \ ' SET gw_ratio = \"' + str(json_ratio) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE TRAFFIC GW INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] TRAFFIC GW DB Update Fail.') except: LOG.exception() in_out_dic = dict() in_out_dic['gw_list'] = gw_list in_out_dic['gw_total'] = total_cnt pre_stat[node_name + '_GW'] = in_out_dic except: LOG.exception() status = 'fail' if not status == 'ok': reason.append(json_ratio) return status, pre_stat, reason
def onos_node_check(conn, db_log, node_name, node_ip): try: node_rt = SshCommand.onos_ssh_exec(node_ip, 'openstack-nodes') node_list = [] port_list = [] fail_reason = [] ip_list = [] node_status = 'ok' if node_rt is not None: for ip in CONF.openstack()['compute_list'] + CONF.openstack( )['gateway_list']: ip = str(ip).split(':')[1] find_flag = False for line in node_rt.splitlines(): if (not (line.startswith('Total') or line.startswith('Hostname')) ) and ' ' + ip + ' ' in line: find_flag = True fail_flag = False new_line = " ".join(line.split()) tmp = new_line.split(' ') host_name = tmp[0] node_type = tmp[1] of_id = tmp[2] if not 'COMPLETE' in line: node_status = 'nok' fail_flag = True try: sql = 'SELECT nodename FROM ' + DB.NODE_INFO_TBL + ' WHERE ip_addr = \'' + ip + '\'' openstack_nodename = conn.cursor().execute( sql).fetchone()[0] if tmp[3].startswith('of:'): manage_ip = tmp[4] data_ip = tmp[5] state = tmp[6] else: manage_ip = tmp[3] data_ip = tmp[4] state = tmp[5] sql = 'UPDATE ' + DB.OPENSTACK_TBL + \ ' SET data_ip = \'' + data_ip + '\',' + \ ' hostname = \'' + host_name + '\',' + \ ' of_id = \'' + of_id + '\'' + \ ' WHERE nodename = \'' + openstack_nodename + '\'' db_log.write_log( '----- UPDATE OPENSTACK INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log( '[FAIL] OPENSTACK DATA IP Update Fail.') except: LOG.exception() port_rt = SshCommand.onos_ssh_exec( node_ip, 'openstack-node-check ' + host_name) host_port_list = [] port_status = 'ok' if port_rt is not None: for port_line in port_rt.splitlines(): if port_line.startswith( '[') or port_line.strip() == '': continue tmp = port_line.split(' ') if not port_line.startswith('OK'): rest_json = { 'port_name': tmp[1].split('=')[0], 'status': 'nok' } fail_flag = True port_status = 'nok' node_status = 'nok' else: rest_json = { 'port_name': tmp[1].split('=')[0], 'status': 'ok' } host_port_list.append(rest_json) else: node_status = 'nok' port_status = 'nok' port_json = { 'hostname': host_name, 'port_list': host_port_list } port_list.append(port_json) rest_json = { 'hostname': host_name, 'type': node_type, 'of_id': of_id, 'management_ip': manage_ip, 'data_ip': data_ip, 'state': state, 'port_status': port_status, 'monitor_item': True } node_list.append(rest_json) ip_list.append(manage_ip) if fail_flag: fail_reason.append(rest_json) if not find_flag: rest_json = { 'hostname': '-', 'type': '-', 'of_id': '-', 'port_status': 'nok', 'management_ip': ip, 'data_ip': '-', 'state': 'NO_EXIST', 'monitor_item': True } node_list.append(rest_json) node_status = 'nok' fail_reason.append(rest_json) for line in node_rt.splitlines(): if not (line.startswith('Total') or line.startswith('Hostname')): new_line = " ".join(line.split()) tmp = new_line.split(' ') if tmp[3].startswith('of:'): manage_ip = tmp[4] data_ip = tmp[5] state = tmp[6] else: manage_ip = tmp[3] data_ip = tmp[4] state = tmp[5] if not manage_ip in ip_list: rest_json = { 'hostname': tmp[0], 'type': tmp[1], 'of_id': tmp[2], 'management_ip': manage_ip, 'data_ip': data_ip, 'state': state, 'monitor_item': False } node_list.append(rest_json) else: LOG.error("\'%s\' ONOS Node Check Error", node_ip) node_status = 'fail' node_list = 'fail' try: sql = 'UPDATE ' + DB.ONOS_TBL + \ ' SET nodelist = \"' + str(node_list) + '\",' + \ ' port = \"' + str(port_list) + '\"' \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE ONOS NODE INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] ONOS NODE Update Fail.') except: LOG.exception() except: LOG.exception() node_status = 'fail' return node_status, fail_reason
def controller_traffic_check(conn, db_log, node_name, node_ip, pre_stat): try: summary_rt = SshCommand.onos_ssh_exec(node_ip, 'summary') in_packet = 0 out_packet = 0 cpman_stat_list = list() controller_traffic = 'ok' reason = [] desc = '' ratio = 0 if summary_rt is not None: data_ip = str(summary_rt).split(',')[0].split('=')[1] try: sql = 'SELECT hostname, of_id FROM ' + DB.OPENSTACK_TBL nodes_info = conn.cursor().execute(sql).fetchall() for hostname, of_id in nodes_info: cmd = 'cpman-stats-list ' + data_ip + ' control_message ' + of_id stat_rt = SshCommand.onos_ssh_exec(node_ip, cmd) rest_json = { 'hostname': str(hostname), 'of_id': str(of_id), 'inbound': '-', 'outbound': '-', 'mod': '-', 'removed': '-', 'request': '-', 'reply': '-' } if stat_rt is not None: if not str(stat_rt).startswith('Failed'): for line in stat_rt.splitlines(): type = line.split(',')[0].split('=')[1] avg_cnt = int(line.split(',')[2].split('=')[1]) if type == 'INBOUND_PACKET': in_packet = in_packet + avg_cnt in_p = avg_cnt elif type == 'OUTBOUND_PACKET': out_packet = out_packet + avg_cnt out_p = avg_cnt elif type == 'FLOW_MOD_PACKET': mod_p = avg_cnt elif type == 'FLOW_REMOVED_PACKET': remove_p = avg_cnt elif type == 'REQUEST_PACKET': req_p = avg_cnt elif type == 'REPLY_PACKET': res_p = avg_cnt rest_json = { 'hostname': str(hostname), 'of_id': str(of_id), 'inbound': in_p, 'outbound': out_p, 'mod': mod_p, 'removed': remove_p, 'request': req_p, 'reply': res_p } else: reason.append(rest_json) controller_traffic = 'fail' else: reason.append(rest_json) controller_traffic = 'fail' cpman_stat_list.append(rest_json) for_save_in = in_packet for_save_out = out_packet if not dict(pre_stat).has_key(node_name): controller_traffic = '-' in_out_dic = dict() in_out_dic['in_packet'] = for_save_in in_out_dic['out_packet'] = for_save_out pre_stat[node_name] = in_out_dic else: in_packet = in_packet - int( dict(pre_stat)[node_name]['in_packet']) out_packet = out_packet - int( dict(pre_stat)[node_name]['out_packet']) if in_packet <= CONF.alarm( )['controller_traffic_minimum_inbound']: desc = 'Minimum increment for status check = ' + str( CONF.alarm()['controller_traffic_minimum_inbound']) controller_traffic = '-' else: if in_packet == 0 and out_packet == 0: ratio = 100 elif in_packet <= 0 or out_packet < 0: LOG.info('Controller Traffic Ratio Fail.') ratio = 0 else: ratio = float(out_packet) * 100 / in_packet LOG.info('[CPMAN][' + node_name + '] Controller Traffic Ratio = ' + str(ratio) + '(' + str(out_packet) + '/' + str(in_packet) + ')') desc = 'Controller Traffic Ratio = ' + str( ratio) + '(' + str(out_packet) + '/' + str( in_packet) + ')\n' if ratio < float( CONF.alarm()['controller_traffic_ratio']): controller_traffic = 'nok' in_out_dic = dict() in_out_dic['in_packet'] = for_save_in in_out_dic['out_packet'] = for_save_out pre_stat[node_name] = in_out_dic except: LOG.exception() controller_traffic = 'fail' else: controller_traffic = 'fail' controller_json = { 'status': controller_traffic, 'stat_list': cpman_stat_list, 'minimum_inbound_packet': CONF.alarm()['controller_traffic_minimum_inbound'], 'current_inbound_packet': in_packet, 'current_outbound_packet': out_packet, 'period': CONF.watchdog()['interval'], 'ratio': format(ratio, '.2f'), 'description': desc, 'threshold': CONF.alarm()['controller_traffic_ratio'] } if not controller_traffic == 'ok': reason.append(controller_json) try: sql = 'UPDATE ' + DB.ONOS_TBL + \ ' SET traffic_stat = \"' + str(controller_json) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE CONTROLLER TRAFFIC INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] CONTROLLER TRAFFIC Update Fail.') except: LOG.exception() except: LOG.exception() controller_traffic = 'fail' return controller_traffic, pre_stat, reason
def swarm_check(conn, db_log, node_name, user_name, node_ip): str_node = '' str_service = '' str_ps = '' ret_app = 'ok' ret_node = 'ok' node_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker node ls') if node_rt is not None: try: leader_flag = False for line in node_rt.splitlines(): line = line.decode('utf-8') str_node = str_node + line + '\n' if line.startswith('ID'): continue if 'Leader' in line: leader_flag = True if not ('Ready' in line and 'Active' in line): ret_node = 'nok' break if 'Down' in line: ret_node = 'nok' break if not leader_flag: ret_node = 'nok' except: LOG.exception() ret_node = 'nok' else: LOG.error("\'%s\' Swarm Node Check Error", node_ip) str_node = 'fail' service_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker service ls') if service_rt is not None: try: for app in CONF.swarm()['app_list']: find_flag = False for line in service_rt.splitlines(): line = line.decode('utf-8') if line.startswith('ID'): continue id, name, mode, rep, img = line.split() if app == name: find_flag = True rep_tmp = rep.split('/') if not (rep_tmp[0] == rep_tmp[1]): ret_app = 'nok' break if not find_flag: ret_app = 'nok' break except: LOG.exception() ret_app = 'nok' for line in service_rt.splitlines(): line = line.decode('utf-8') str_service = str_service + line + '\n' else: LOG.error("\'%s\' Swarm Service Check Error", node_ip) str_service = 'fail' ret_app = 'nok' try: for app in CONF.swarm()['app_list']: ps_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker service ps ' + app) str_ps = str_ps + ' * ' + app + '\n\n' if ps_rt is not None: for line in ps_rt.splitlines(): line = line.decode('utf-8') str_ps = str_ps + line + '\n' else: LOG.error("\'%s\' Swarm PS Check Error", node_ip) str_ps = str_ps + 'Command failure(' + app + ')\n' str_ps = str_ps + '\n' except: LOG.exception() try: sql = 'UPDATE ' + DB.SWARM_TBL + \ ' SET node = \'' + str_node + '\',' + \ ' service = \'' + str_service + '\',' + \ ' ps = \'' + str_ps + '\'' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE SWARM INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] SWARN DB Update Fail.') except: LOG.exception() return ret_app, ret_node
def swarm_service_check(conn, db_log, node_name, username, node_ip, swarm_manager): service_status = 'ok' service_list = [] ps_list = [] fail_reason = [] try: cmd = 'ssh root@' + swarm_manager + ' \"sudo docker service ls\"' service_rt = SshCommand.ssh_exec(username, node_ip, cmd) instance_list = get_service_list() if service_rt is not None: try: for svc in instance_list: find_flag = False for line in service_rt.splitlines(): line = line.decode('utf-8') if line.startswith('ID'): continue id, name, mode, rep, img = line.split() if svc == name: find_flag = True rep_tmp = rep.split('/') if not (rep_tmp[0] == rep_tmp[1]): service_status = 'nok' svc_json = { 'name': name, 'mode': mode, 'replicas': rep, 'image': img, 'status': 'nok', 'monitor_item': True } fail_reason.append(svc_json) else: svc_json = { 'name': name, 'mode': mode, 'replicas': rep, 'image': img, 'status': 'ok', 'monitor_item': True } service_list.append(svc_json) if not find_flag: service_status = 'nok' fail_reason.append('swarm ' + svc + ' service does not exist.') break for line in service_rt.splitlines(): line = line.decode('utf-8') if line.startswith('ID'): continue id, name, mode, rep, img = line.split() if name in instance_list: continue rep_tmp = rep.split('/') if not (rep_tmp[0] == rep_tmp[1]): svc_json = { 'name': name, 'mode': mode, 'replicas': rep, 'image': img, 'status': 'nok', 'monitor_item': False } else: svc_json = { 'name': name, 'mode': mode, 'replicas': rep, 'image': img, 'status': 'ok', 'monitor_item': False } service_list.append(svc_json) except: LOG.exception() service_status = 'fail' else: LOG.error("\'%s\' Swarm Service Check Error", node_ip) service_status = 'fail' for app in instance_list: cmd = 'ssh root@' + swarm_manager + ' \"sudo docker service ps ' + app + '\"' ps_rt = SshCommand.ssh_exec(username, node_ip, cmd) if ps_rt is not None: for line in ps_rt.splitlines(): line = line.decode('utf-8') if line.startswith('ID'): continue line = line.replace(' \_ ', '') line = " ".join(line.split()) tmp = line.split(' ') ps_json = { 'name': tmp[1], 'image': tmp[2], 'node': tmp[3], 'desired_state': tmp[4], 'current_state': tmp[5] } ps_list.append(ps_json) else: LOG.error("\'%s\' Swarm PS Check Error", node_ip) try: sql = 'UPDATE ' + DB.SWARM_TBL + \ ' SET service = \"' + str(service_list) + '\",' + \ ' ps = \"' + str(ps_list) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE SWARM SERVICE/PS INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] SWARM SERVICE/PS DB Update Fail.') except: LOG.exception() except: LOG.exception() service_status = 'fail' return service_status, fail_reason
def swarm_node_check(conn, db_log, node_name, username, node_ip, swarm_manager): node_status = 'ok' node_list = [] fail_reason = [] try: cmd = 'ssh root@' + swarm_manager + ' \"sudo docker node ls\"' node_rt = SshCommand.ssh_exec(username, node_ip, cmd) if node_rt is not None: try: leader_flag = False for line in node_rt.splitlines(): line = line.decode('utf-8') line = " ".join(line.replace('*', '').split()) tmp = line.split(' ') if line.startswith('ID'): continue if 'Leader' in line: node_json = { 'hostname': tmp[1], 'status': tmp[2], 'availability': tmp[3], 'manager': tmp[4] } leader_flag = True if not ('Ready' in line and 'Active' in line): node_status = 'nok' fail_reason.append(tmp[1] + ' node is not ready.') else: node_json = { 'hostname': tmp[1], 'status': tmp[2], 'availability': tmp[3], 'manager': '' } if 'Down' in line: node_status = 'nok' fail_reason.append(tmp[1] + ' node is down.') node_list.append(node_json) if not leader_flag: node_status = 'nok' fail_reason.append('swarm leader node does not exist.') except: LOG.exception() node_status = 'nok' else: LOG.error("\'%s\' Swarm Node Check Error", node_ip) node_status = 'fail' try: sql = 'UPDATE ' + DB.SWARM_TBL + \ ' SET node = \"' + str(node_list) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE SWARM NODE INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] SWARM NODE DB Update Fail.') except: LOG.exception() except: LOG.exception() node_status = 'fail' return node_status, fail_reason
def vrouter_check(conn, db_log, node_name, user_name, node_ip): ret_docker = 'ok' docker_list = [] fail_list = [] onos_id = '' docker_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker ps') if docker_rt is not None: try: for docker in CONF.openstack()['docker_list']: for line in docker_rt.splitlines(): if line.startswith('CONTAINER'): continue tmp_line = line.split() if ' ' + docker in line: if not 'Up' in line: docker_json = {'name': docker, 'status': 'nok', 'type': 'docker'} fail_list.append(docker_json) ret_docker = 'nok' else: docker_json = {'name': docker, 'status': 'ok', 'type': 'docker'} docker_list.append(docker_json) if 'onos' in tmp_line[1]: onos_id = tmp_line[0] except: LOG.exception() else: LOG.error("\'%s\' Vrouter Node Check Error", node_ip) ret_docker = 'fail' onos_app_list = [] route_list = [] if not onos_id == '': try: # get onos container ip onos_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo docker inspect ' + onos_id + ' | grep IPAddress') if onos_rt is not None: for line in onos_rt.splitlines(): line = line.strip() if line.startswith('\"IPAddress'): tmp = line.split(':') onos_ip = tmp[1].strip().replace('\"', '').replace(',', '') break app_list = SshCommand.ssh_pexpect(user_name, node_ip, onos_ip, 'apps -a -s') app_active_list = list() for line in app_list.splitlines(): if line.startswith('fail'): continue app_active_list.append(line.split(".")[2].split()[0]) for app in CONF.openstack()['onos_vrouter_app_list']: if app in app_active_list: app_json = {'name': app, 'status': 'ok', 'type': 'onos_app'} else: app_json = {'name': app, 'status': 'nok', 'type': 'onos_app'} fail_list.append(app_json) ret_docker = 'nok' onos_app_list.append(app_json) str_route = SshCommand.ssh_pexpect(user_name, node_ip, onos_ip, 'routes') for line in str_route.splitlines(): line = line.strip() if (line.startswith('Table') or line.startswith('Network') or line.startswith('Total')): continue new_line = " ".join(line.split()) if new_line.startswith('fail'): continue tmp = new_line.split(' ') route_json = {'network': tmp[0], 'next_hop': tmp[1]} route_list.append(route_json) except: LOG.exception() else: LOG.info('can not find onos_id.') ret_docker = 'fail' try: sql = 'UPDATE ' + DB.OPENSTACK_TBL + \ ' SET docker = \"' + str(docker_list) + '\",' + \ ' onosApp = \"' + str(onos_app_list) + '\",' + \ ' routingTable = \"' + str(route_list) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE GATEWAY INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] GATEWAY DB Update Fail.') except: LOG.exception() return ret_docker, fail_list
def get_internal_traffic(conn, db_log, node_name, node_ip, user_name, sub_type, rx_count, patch_tx, pre_stat): try: status = 'ok' in_packet = 0 out_packet = 0 reason_list = [] desc = '' if sub_type == 'COMPUTE': flow_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo ovs-ofctl -O OpenFlow13 dump-flows br-int') inport_cnt = 0 gw_cnt = 0 output_cnt = 0 if flow_rt is not None: for line in flow_rt.splitlines(): tmp = line.split(',') if 'in_port' in line: inport_cnt = inport_cnt + int(tmp[3].split('=')[1]) elif 'output' in line: output_cnt = output_cnt + int(tmp[3].split('=')[1]) elif 'actions=group' in line: gw_cnt = gw_cnt + int(tmp[3].split('=')[1]) in_packet = inport_cnt + rx_count out_packet = gw_cnt + output_cnt port_json = {'vm_tx': inport_cnt, 'vxlan_rx': rx_count, 'out_gw': gw_cnt, 'output': output_cnt} else: port_json = {'vm_tx': -1, 'vxlan_rx': -1, 'out_gw': -1, 'output': -1} status = 'fail' else: port_json = {'vxlan_rx': rx_count, 'patch-integ': patch_tx} if patch_tx == -1: status = 'fail' else: in_packet = rx_count out_packet = patch_tx for_save_in = in_packet for_save_out = out_packet if not dict(pre_stat).has_key(node_name + '_internal'): status = '-' vxlan_json = {'port_stat_in_out': port_json, 'period': CONF.watchdog()['interval'], 'ratio': 0, 'current_rx': -1, 'current_tx': -1, 'description': desc, 'threshold': CONF.alarm()['internal_traffic_ratio'], 'status': status} elif status == 'ok': in_packet = in_packet - int(dict(pre_stat)[node_name + '_internal']['in_packet']) out_packet = out_packet - int(dict(pre_stat)[node_name + '_internal']['out_packet']) if in_packet == 0 and out_packet == 0: ratio = 100 elif in_packet <= 0 or out_packet < 0: LOG.info('Internal Traffic Ratio Fail.') ratio = 0 else: ratio = float(out_packet) * 100 / in_packet LOG.info('Internal Traffic Ratio = ' + str(ratio)) desc = 'Internal Traffic Ratio = ' + str(ratio) + '(' + str(out_packet) + '/' + str(in_packet) + ')' if ratio < float(CONF.alarm()['internal_traffic_ratio']): status = 'nok' vxlan_json = {'port_stat_in_out': port_json, 'period': CONF.watchdog()['interval'], 'ratio': format(ratio, '.2f'), 'current_rx': in_packet, 'current_tx': out_packet, 'description': desc, 'threshold': CONF.alarm()['internal_traffic_ratio'], 'status': status} in_out_dic = dict() in_out_dic['in_packet'] = for_save_in in_out_dic['out_packet'] = for_save_out pre_stat[node_name + '_internal'] = in_out_dic try: sql = 'UPDATE ' + DB.OPENSTACK_TBL + \ ' SET internal_traffic = \"' + str(vxlan_json) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE INTERNAL TRAFFIC INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] INTERNAL TRAFFIC DB Update Fail.') except: LOG.exception() except: LOG.exception() status = 'fail' if not status == 'ok': reason_list.append(vxlan_json) return status, pre_stat, reason_list
def get_node_traffic(conn, db_log, node_name, rx_dic, tx_dic, total_rx, total_tx, err_info, pre_stat): try: status = 'ok' reason_list = [] pre_total_rx = total_rx pre_total_tx = total_tx # check minimum packet count sql = 'SELECT data_ip FROM ' + DB.OPENSTACK_TBL + ' WHERE nodename = \'' + node_name + '\'' data_ip = conn.cursor().execute(sql).fetchone()[0] sql = 'SELECT ip_addr FROM ' + DB.NODE_INFO_TBL + ' WHERE type = \'ONOS\'' nodes_info = conn.cursor().execute(sql).fetchall() min_rx = 0 if len(nodes_info) == 0: LOG.info('Fail to load onos list') status = 'fail' else: for ip in nodes_info: flows_rt = SshCommand.onos_ssh_exec(ip[0], '\"flows --filter \'{tunnelDst=' + data_ip + '}\' --short\"') if flows_rt is not None: for line in flows_rt.splitlines(): if 'tunnelDst' in line: min_rx = min_rx + int(line.split(',')[2].split('=')[1]) break if not dict(pre_stat).has_key(node_name + '_VXLAN'): status = '-' ratio = -1 else: total_rx = total_rx - int(dict(pre_stat)[node_name + '_VXLAN']['total_rx']) total_tx = total_tx - int(dict(pre_stat)[node_name + '_VXLAN']['total_tx']) cur_min = min_rx - int(dict(pre_stat)[node_name + '_VXLAN']['min_rx']) if total_rx == 0 and total_tx == 0: ratio = 100 elif total_tx <= 0 or total_tx < 0: LOG.info('Node Traffic Ratio Fail.') ratio = 0 else: ratio = float(total_rx) * 100 / total_tx LOG.info('Node Traffic Ratio = ' + str(ratio)) port_json = {'rx': rx_dic[node_name], 'minimum_rx': min_rx, 'rx_drop': err_info['rx_drop'], 'rx_errs': err_info['rx_err'], 'tx': tx_dic[node_name], 'tx_drop': err_info['tx_drop'], 'tx_errs': err_info['tx_err']} description = '' if not status == '-': description = 'Ratio of success for all nodes = ' + str(ratio) + ' (' + str(total_rx) + ' / ' + str(total_tx) + ')' if ratio < float(CONF.alarm()['node_traffic_ratio']): LOG.info('[NODE TRAFFIC] ratio nok') status = 'nok' if total_rx < cur_min: LOG.info('CUR_MIN_RX = ' + str(cur_min) + ', CUR_RX = ' + str(total_rx) + ', Less than rx minimum.') status = 'nok' if err_info['rx_drop'] - int(dict(pre_stat)[node_name + '_VXLAN']['rx_drop']) > 0: LOG.info('[NODE TRAFFIC] rx_drop nok') status = 'nok' if err_info['rx_err'] - int(dict(pre_stat)[node_name + '_VXLAN']['rx_err']) > 0: LOG.info('[NODE TRAFFIC] rx_err nok') status = 'nok' if err_info['tx_drop'] - int(dict(pre_stat)[node_name + '_VXLAN']['tx_drop']) > 0: LOG.info('[NODE TRAFFIC] tx_drop nok') status = 'nok' if err_info['tx_err'] - int(dict(pre_stat)[node_name + '_VXLAN']['tx_err']) > 0: LOG.info('[NODE TRAFFIC] tx_err nok') status = 'nok' in_out_dic = dict() in_out_dic['total_rx'] = pre_total_rx in_out_dic['total_tx'] = pre_total_tx in_out_dic['min_rx'] = min_rx in_out_dic['rx_drop'] = err_info['rx_drop'] in_out_dic['rx_err'] = err_info['rx_err'] in_out_dic['tx_drop'] = err_info['tx_drop'] in_out_dic['tx_err'] = err_info['tx_err'] pre_stat[node_name + '_VXLAN'] = in_out_dic except: LOG.exception() status = 'fail' vxlan_json = {'port_stat_vxlan': port_json, 'period': CONF.watchdog()['interval'], 'ratio': format(ratio, '.2f'), 'current_rx': total_rx, 'current_tx': total_tx, 'description': description, 'threshold': CONF.alarm()['node_traffic_ratio'], 'status': status} try: sql = 'UPDATE ' + DB.OPENSTACK_TBL + \ ' SET vxlan_traffic = \"' + str(vxlan_json) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE VXLAN STAT INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] VXLAN STAT DB Update Fail.') except: LOG.exception() if not status == 'ok': reason_list.append(vxlan_json) return status, pre_stat, reason_list
def onos_app_check(conn, db_log, node_name, node_ip): try: app_rt = SshCommand.onos_ssh_exec(node_ip, 'apps -a -s') status = 'ok' app_active_list = list() app_list = [] fail_reason = [] if app_rt is not None: for line in app_rt.splitlines(): app_active_list.append(line.split(".")[2].split()[0]) if not 'cpman' in app_active_list: # activate cpman LOG.info('Cpman does not exist. Activate cpman') SshCommand.onos_ssh_exec(node_ip, 'app activate org.onosproject.cpman') for app in CONF.onos()['app_list']: if app in app_active_list: app_json = { 'name': app, 'status': 'ok', 'monitor_item': True } app_active_list.remove(app) else: status = 'nok' app_json = { 'name': app, 'status': 'nok', 'monitor_item': True } fail_reason.append(app_json) app_list.append(app_json) for app in app_active_list: app_json = {'name': app, 'status': 'ok', 'monitor_item': False} app_list.append(app_json) else: LOG.error("\'%s\' ONOS Application Check Error", node_ip) status = 'fail' app_list = 'fail' try: sql = 'UPDATE ' + DB.ONOS_TBL + \ ' SET applist = \"' + str(app_list) + '\"' +\ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE ONOS APP INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] ONOS APP DB Update Fail.') except: LOG.exception() except: LOG.exception() status = 'fail' return status, fail_reason
def onos_conn_check(conn, db_log, node_name, node_ip): try: device_rt = SshCommand.onos_ssh_exec(node_ip, 'devices') nodes_rt = SshCommand.onos_ssh_exec(node_ip, 'nodes') of_status = 'ok' of_list = [] of_fail_reason = [] cluster_list = [] cluster_fail_reason = [] find_list = [] if device_rt is not None: try: sql = 'SELECT hostname, of_id FROM ' + DB.OPENSTACK_TBL nodes_info = conn.cursor().execute(sql).fetchall() for hostname, switch_id in nodes_info: for line in device_rt.splitlines(): if line.startswith('id=of'): find_list.append(switch_id) of_id = line.split(',')[0].split('=')[1] available = line.split(',')[1].split('=')[1] if switch_id == of_id: rest_json = parse_openflow( line, str(hostname), True) if not available == 'true': of_status = 'nok' of_fail_reason.append(rest_json) of_list.append(rest_json) for line in device_rt.splitlines(): if line.startswith('id=of'): of_id = line.split(',')[0].split('=')[1] if not of_id in find_list: rest_json = parse_openflow(line, '', False) of_list.append(rest_json) except: LOG.exception() LOG.error("\'%s\' Connection Check Error(devices)", node_ip) of_status = 'fail' else: LOG.error("\'%s\' Connection Check Error(devices)", node_ip) of_status = 'fail' cluster_status = 'ok' if nodes_rt is not None: try: sql = 'SELECT ip_addr FROM ' + DB.NODE_INFO_TBL + ' WHERE type = \'ONOS\'' nodes_info = conn.cursor().execute(sql).fetchall() cluster_ip_list = list() for onos_ip in nodes_info: find_flag = False summary_rt = SshCommand.onos_ssh_exec( onos_ip[0], 'summary') if summary_rt is not None: data_ip = str(summary_rt).split(',')[0].split('=')[1] for line in nodes_rt.splitlines(): id = line.split(',')[0].split('=')[1] address = line.split(',')[1].split('=')[1] state = line.split(',')[2].split('=')[1].split( ' ')[0] if data_ip == address.split(':')[0]: find_flag = True cluster_ip_list.append(address) rest_json = { 'id': id, 'address': address, 'status': 'ok', 'monitor_item': True } cluster_list.append(rest_json) if not state == 'READY': cluster_status = 'nok' cluster_fail_reason.append(rest_json) if not find_flag: rest_json = { 'id': data_ip, 'address': '-', 'status': 'nok', 'monitor_item': True } cluster_list.append(rest_json) cluster_status = 'nok' cluster_fail_reason.append(rest_json) else: rest_json = { 'id': onos_ip, 'address': '-', 'status': 'nok', 'monitor_item': True } cluster_list.append(rest_json) if summary_rt is not None: for line in nodes_rt.splitlines(): id = line.split(',')[0].split('=')[1] address = line.split(',')[1].split('=')[1] state = line.split(',')[2].split('=')[1].split(' ')[0] if not state == 'READY': status = 'nok' else: status = 'ok' if not address in cluster_ip_list: rest_json = { 'id': id, 'address': address, 'status': status, 'monitor_item': True } cluster_list.append(rest_json) except: pass else: LOG.error("\'%s\' Connection Check Error(nodes)", node_ip) cluster_status = 'fail' try: sql = 'UPDATE ' + DB.ONOS_TBL + \ ' SET openflow = \"' + str(of_list) + '\",' + \ ' cluster = \"' + str(cluster_list) + '\"' \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE ONOS CONNECTION INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] ONOS CONNECTION DB Update Fail.') except: LOG.exception() except: LOG.exception() of_status = 'fail' cluster_status = 'fail' return of_status, cluster_status, of_fail_reason, cluster_fail_reason
def get_gw_ratio_gateway(conn, db_log, node_ip, node_name, rx, gw_rx_sum, pre_stat): status = 'ok' reason = [] try: sql = 'SELECT ' + DB.ONOS_TBL + '.nodename, nodelist, ip_addr' + ' FROM ' + DB.ONOS_TBL + \ ' INNER JOIN ' + DB.NODE_INFO_TBL + ' ON ' + DB.ONOS_TBL + '.nodename = ' + DB.NODE_INFO_TBL + '.nodename' nodes_info = conn.cursor().execute(sql).fetchall() if len(nodes_info) == 0: LOG.info('Fail to load onos list') return 'fail', pre_stat, reason # search data_ip data_ip = '' manage_ip = '' cpt_to_gw_packet = 0 for nodename, nodelist, ip in nodes_info: if not nodelist == 'none': for node_info in eval(nodelist): try: if dict(node_info)['management_ip'] == node_ip: manage_ip = ip data_ip = dict(node_info)['data_ip'] except: manage_ip = '' if not manage_ip == '': break if not manage_ip == '': break if data_ip == '': LOG.info('Can not find data ip') return 'fail', pre_stat, reason group_rt = SshCommand.onos_ssh_exec(manage_ip, 'groups') if group_rt is not None: for line in group_rt.splitlines(): if '{tunnelDst=' + data_ip + '}' in line: tmp = line.split(',') for col in tmp: if 'packets=' in col: cpt_to_gw_packet = cpt_to_gw_packet + int(col.split('=')[1]) if not dict(pre_stat).has_key(node_name + '_GW'): status = '-' json_ratio = {'current_rx': '-', 'current_compute_tx': '-', 'current_total': '-', 'ratio': '-', 'period': CONF.watchdog()['interval'], 'status': status, 'packet_loss': False, 'description': ''} else: cur_rx = rx - int(dict(pre_stat)[node_name + '_GW']['rx']) cur_total = gw_rx_sum - int(dict(pre_stat)[node_name + '_GW']['gw_rx_sum']) cur_packet = cpt_to_gw_packet - int(dict(pre_stat)[node_name + '_GW']['cpt_to_gw_packet']) if cur_rx == 0 and cur_total == 0: ratio = 100 elif cur_rx <= 0 or cur_total < 0: ratio = 0 else: ratio = float(cur_rx) * 100 / cur_total desc = 'GW RATIO = ' + str(ratio) + ' (' + str(cur_rx) + ' / ' + str(cur_total) + ')' loss_flag = False if cur_rx < cur_packet: LOG.info('GW Ratio Fail. (Data loss)') loss_flag = True LOG.info('GW Ratio = ' + str(ratio)) if ratio < float(CONF.alarm()['gw_ratio']) or cur_rx < cur_packet: status = 'nok' json_ratio = {'current_rx': cur_rx, 'current_compute_tx': cur_packet, 'current_total': cur_total, 'ratio': format(ratio, '.2f'), 'period':CONF.watchdog()['interval'], 'status': status, 'packet_loss': loss_flag, 'description': desc} try: sql = 'UPDATE ' + DB.OPENSTACK_TBL + \ ' SET gw_ratio = \"' + str(json_ratio) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE TRAFFIC GW INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] TRAFFIC GW DB Update Fail.') except: LOG.exception() in_out_dic = dict() in_out_dic['rx'] = rx in_out_dic['gw_rx_sum'] = gw_rx_sum in_out_dic['cpt_to_gw_packet'] = cpt_to_gw_packet pre_stat[node_name + '_GW'] = in_out_dic except: LOG.exception() status = 'fail' if not status == 'ok': reason.append(json_ratio) return status, pre_stat, reason
def onos_rest_check(conn, db_log, node_name, node_ip): try: web_status = 'ok' web_list = [] fail_reason = [] web_rt = SshCommand.onos_ssh_exec(node_ip, 'web:list') if web_rt is not None: for web in CONF.onos()['rest_list']: for line in web_rt.splitlines(): if line.startswith('ID') or line.startswith('--'): continue if ' ' + web + ' ' in line: if not ('Active' in line and 'Deployed' in line): rest_json = { 'name': web, 'status': 'nok', 'monitor_item': True } fail_reason.append(rest_json) web_status = 'nok' else: rest_json = { 'name': web, 'status': 'ok', 'monitor_item': True } web_list.append(rest_json) for line in web_rt.splitlines(): if line.startswith('ID') or line.startswith('--'): continue name = " ".join(line.split()).split(' ')[10] if not name in CONF.onos()['rest_list']: if not ('Active' in line and 'Deployed' in line): rest_json = { 'name': name, 'status': 'nok', 'monitor_item': False } else: rest_json = { 'name': name, 'status': 'ok', 'monitor_item': False } web_list.append(rest_json) else: LOG.error("\'%s\' ONOS Rest Check Error", node_ip) web_status = 'fail' web_list = 'fail' try: sql = 'UPDATE ' + DB.ONOS_TBL + \ ' SET weblist = \"' + str(web_list) + '\"' +\ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE ONOS REST INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] ONOS REST DB Update Fail.') except: LOG.exception() except: LOG.exception() web_status = 'fail' return web_status, fail_reason