def push_event(node_name, item, grade, desc, time): sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL with DB.connection() as conn: url_list = conn.cursor().execute(sql).fetchall() conn.close() for url, auth in url_list: header = {'Content-Type': 'application/json', 'Authorization': auth} req_body = { 'event': 'occur', 'system': node_name, 'item': item, 'grade': grade, 'desc': desc, 'time': time } req_body_json = json.dumps(req_body) try: requests.post(url, headers=header, data=req_body_json, timeout=2) except: # rest timeout LOG.exception()
def regi_url(url, auth): try: sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL + ' WHERE url = \'' + url + '\'' with DB.connection() as conn: url_info = conn.cursor().execute(sql).fetchall() conn.close() # if already exist if len(url_info) == 1: res_body = {'Result': 'SUCCESS'} else: # insert db sql = 'INSERT INTO ' + DB.REGI_SYS_TBL + ' VALUES (\'' + url + '\', \'' + auth + '\' )' ret = DB.sql_execute(sql) if ret == 'SUCCESS': res_body = {'Result': 'SUCCESS'} else: res_body = {'Result': 'FAIL'} return res_body except: LOG.exception() return {'Result': 'FAIL'}
def get_disk_usage(username, node_ip, only_value=False): cmd = 'df -h / | grep -v Filesystem' cmd_rt = SshCommand.ssh_exec(username, node_ip, cmd) ratio = float() if cmd_rt is None: LOG.info("%s Diksk check Fail", node_ip) if only_value: return -1 return {'DISK': 'Command fail'} else: if '/' in cmd_rt: LOG.info("cmd_rt %s", cmd_rt) try: ratio = float(cmd_rt.split()[-2].replace('%', '')) except: LOG.exception() result = { 'DISK': { 'RATIO': float(format(ratio, '.2f')), 'Description': cmd_rt } } LOG.info(" Disk check ... %s", result) if only_value: return float(format(ratio, '.2f')) return result
def unregi_url(url): try: sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL + ' WHERE url = \'' + url + '\'' with DB.connection() as conn: url_info = conn.cursor().execute(sql).fetchall() conn.close() # if no exist if len(url_info) == 0: res_body = {'Result': 'SUCCESS'} else: # delete db sql = 'DELETE FROM ' + DB.REGI_SYS_TBL + ' WHERE url = \'' + url + '\'' ret = DB.sql_execute(sql) if ret == 'SUCCESS': res_body = {'Result': 'SUCCESS'} else: res_body = {'Result': 'FAIL'} return res_body except: LOG.exception() return {'Result': 'FAIL'}
def proc_dis_log(node, param): cmd = 'ld' try: if param == 'debug': cmd = 'ld -l DEBUG' elif param == 'info': cmd = 'ld -l INFO' elif param == 'error': cmd = 'ld -l ERROR' elif param == 'exception': cmd = 'log:exception-display' nodes_info = get_node_list(node, 'nodename, ip_addr, type') res_result = dict() for node_name, ip, type in nodes_info: if type.upper() == 'ONOS': log_crt = SshCommand.onos_ssh_exec(ip, cmd) if log_crt is not None: res_result[node_name] = log_crt else: res_result[node_name] = 'FAIL' return res_result except: LOG.exception() return {'Result': 'FAIL'}
def authentication(self): try: if not self.headers.getheader("authorization"): self.wfile.write('No Authorization Header\n') return False else: request_auth = self.headers.getheader("authorization") id_pw_list = CONF.rest()['user_password'] try: request_account = base64.b64decode(str(request_auth).split()[-1]) for id_pw in id_pw_list: if id_pw.strip() == request_account: LOG.info('[REST-SERVER] AUTH SUCCESS = %s, from %s', id_pw, self.client_address) return True except: LOG.exception() self.wfile.write('Request Authentication User ID or Password is Wrong \n') LOG.info('[REST-SERVER] AUTH FAIL = %s, from %s', base64.b64decode(str(request_auth).split()[-1]), self.client_address) return False except: LOG.exception() return False
def get_service_list(): service_list = [] try: url = CONF.xos()['xos_rest_server'] account = CONF.xos()['xos_rest_account'] cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/instances/' result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) output, error = result.communicate() if result.returncode != 0: LOG.error("Cmd Fail, cause => %s", error) return '' instance_array = json.loads(output) for instance_info in instance_array: name = instance_info['instance_name'] LOG.info('swarm_instance_name = ' + name) service_list.append(name) except: LOG.exception() return service_list
def proc_dis_system(node, dummy): try: result = dict() for sys_type in CONF.watchdog()['check_system']: event_list = DB.get_event_list(sys_type) sql = 'SELECT ' + DB.STATUS_TBL + '.nodename, ' + DB.NODE_INFO_TBL + '.ip_addr, ' + ", ".join(event_list) + ' FROM ' + DB.STATUS_TBL + \ ' INNER JOIN ' + DB.NODE_INFO_TBL + ' ON ' + DB.STATUS_TBL + '.nodename = ' + DB.NODE_INFO_TBL + '.nodename WHERE type = \'' + sys_type + '\'' if not node == 'all': sql = sql + ' and ' + DB.NODE_INFO_TBL + '.nodename = \'' + node + '\'' with DB.connection() as conn: nodes_info = conn.cursor().execute(sql).fetchall() conn.close() for row in nodes_info: line = dict() line['TYPE'] = sys_type line['IP'] = row[1] i = 2 for item in event_list: line[item] = row[i] i = i + 1 result[row[0]] = line return result except: LOG.exception() return {'Result': 'FAIL'}
def get_event_list(url, auth): try: sql_evt = 'SELECT * FROM ' + DB.EVENT_TBL with DB.connection() as conn: evt_list = conn.cursor().execute(sql_evt).fetchall() conn.close() event_list = [] for nodename, item, grade, pre_grade, reason, time in evt_list: evt = { 'event': 'occur', 'system': nodename, 'item': item, 'grade': grade, 'pre_grade': pre_grade, 'reason': 'fail_reason', 'time': time } event_list.append(evt) res_body = {'Result': 'SUCCESS', 'Event list': event_list} return res_body except: LOG.exception() return {'Result': 'FAIL'}
def proc_onos(node, param): try: if param == 'cluster': nodes_info = get_node_list(node, 'nodename, cluster', DB.ONOS_TBL) elif param == 'device': nodes_info = get_node_list(node, 'nodename, device', DB.ONOS_TBL) elif param == 'link': nodes_info = get_node_list(node, 'nodename, link', DB.ONOS_TBL) elif param == 'app': nodes_info = get_node_list(node, 'nodename, app', DB.ONOS_TBL) if len(nodes_info) == 0: return { 'fail': 'dis-conn: This is not a command on the target system.' } res_result = dict() for nodename, value in nodes_info: if value == 'none': res_result[nodename] = 'FAIL' else: res_result[nodename] = eval(value) return res_result except: LOG.exception() return {'Result': 'FAIL'}
def onos_api_req(node_ip, url_path): try: url = "http://%s:%d/%s" % (node_ip, CONF.onos()['api_port'], url_path) auth = CONF.onos()['api_user_passwd'].split(':') timeout = CONF.onos()['api_timeout_sec'] #LOG.info('ONOS API REQUEST: url=%s auth=%s timeout=%s', url, auth, timeout) rsp = requests.get(url, auth=(auth[0], auth[1]), timeout=timeout) #LOG.info('ONOS API RESPONSE: status=%s body=%s', str(rsp.status_code), rsp.content) except: # req timeout LOG.exception() return -1, None if rsp.status_code != 200: return -2, None try: body = json.loads(rsp.content.replace("\'", '"')) return rsp.status_code, body except: LOG.exception() return -2, None
def get_mem_usage(username, node_ip, only_value=False): cmd = 'free -t -m | grep Mem' cmd_rt = SshCommand.ssh_exec(username, node_ip, cmd) ratio = float() if cmd_rt is None: LOG.info("%s Memory check Fail", node_ip) if only_value: return -1 return {'MEMORY': 'Command fail'} else: if 'Mem' in cmd_rt: LOG.info("cmd_rt %s", cmd_rt) try: f = cmd_rt.split() ratio = float(f[2]) * 100 / float(f[1]) except: LOG.exception() result = { 'MEMORY': { 'RATIO': float(format(ratio, '.2f')), 'Description': cmd_rt } } LOG.info(" Memory check ... %s", result) if only_value: return float(format(ratio, '.2f')) return result
def get_cpu_usage(username, node_ip, only_value=False): cmd = 'grep \'cpu\ \' /proc/stat' cmd_rt = SshCommand.ssh_exec(username, node_ip, cmd) ratio = float() if cmd_rt is None: LOG.info("%s CPU check Fail", node_ip) if only_value: return -1 return {'CPU': 'Command fail'} else: if 'cpu ' in cmd_rt: LOG.info("cmd_rt: %s", cmd_rt) try: f = cmd_rt.split() ratio = (float(f[1]) + float(f[3])) * 100 / \ (float(f[1]) + float(f[3]) + float(f[4])) except: LOG.exception() result = { 'CPU': { 'RATIO': float(format(ratio, '.2f')), 'Description': cmd_rt } } LOG.info(" CPU check ... %s", result) if only_value: return float(format(ratio, '.2f')) return result
def get_content(self): if not self.headers.getheader('content-length'): self.do_HEAD(400) self.wfile.write( str({ "result": "FAIL", "fail_reason": "Bad Request, Content Length is 0\n" })) LOG.info('[Data Check] Received No Data from %s', self.client_address) return False else: try: receive_data = json.loads( self.rfile.read( int(self.headers.getheader("content-length")))) LOG.info( '%s', '[Received Data] \n' + json.dumps(receive_data, sort_keys=True, indent=4)) return receive_data except: LOG.exception() error_reason = 'Json Data Parsing Error\n' self.do_HEAD(400) self.wfile.write( str({ "result": "FAIL", "fail_reason": error_reason })) LOG.info('[Check Content] %s', error_reason) return False
def run(): try: server_address = ("", int(CONF.rest()['rest_server_port'])) httpd = HTTPServer(server_address, RestHandler) httpd.serve_forever() except: print 'Rest Server failed to start' LOG.exception()
def xos_status_check(conn, db_log, node_name): xos_status = 'ok' xos_list = [] fail_reason = [] try: url = CONF.xos()['xos_rest_server'] account = CONF.xos()['xos_rest_account'] cmd = 'curl -H "Accept: application/json; indent=4" -u ' + account + ' -X GET ' + url + '/api/core/xoses/' result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) output, error = result.communicate() if result.returncode != 0: LOG.error("Cmd Fail, cause => %s", error) return 'fail', None xos_array = json.loads(output) for xos_info in xos_array: backend_status = xos_info['backend_status'] LOG.info('xos_status_backend_status = ' + backend_status) tmp = str(backend_status).split('-') if tmp[0].strip() == '0': status = 'ok' else: status = 'nok' xos_json = { 'name': xos_info['name'], 'status': status, 'description': tmp[1].strip() } xos_list.append(xos_json) if status == 'nok': xos_status = 'nok' fail_reason.append(xos_json) try: sql = 'UPDATE ' + DB.XOS_TBL + \ ' SET xos_status = \"' + str(xos_list) + '\"' + \ ' WHERE nodename = \'' + node_name + '\'' db_log.write_log('----- UPDATE XOS STATUS INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] XOS STATUS DB Update Fail.') except: LOG.exception() except: LOG.exception() xos_status = 'fail' return xos_status, fail_reason
def rx_tx_check(user_name, node_ip): try: port_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo ovs-ofctl show br-int') err_dict = dict() patch_port = None if port_rt is not None: for line in port_rt.splitlines(): if '(vxlan)' in line: vxlan_port = line.split('(')[0].strip() elif '(patch-intg)' in line: patch_port = line.split('(')[0].strip() port_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo ovs-ofctl dump-ports br-int ' + vxlan_port) if port_rt is not None: line = port_rt.splitlines() if '?' in line[1]: line[1] = line[1].replace('?', '0') if '?' in line[2]: line[2] = line[2].replace('?', '0') tmp = line[1].split(',') rx_packet_cnt = int(tmp[0].split('=')[1]) err_dict['rx_drop'] = int(tmp[2].split('=')[1]) err_dict['rx_err'] = int(tmp[3].split('=')[1]) tmp = line[2].split(',') tx_packet_cnt = int(tmp[0].split('=')[1]) err_dict['tx_drop'] = int(tmp[2].split('=')[1]) err_dict['tx_err'] = int(tmp[3].split('=')[1]) else: rx_packet_cnt = -1 tx_packet_cnt = -1 patch_tx_packet_cnt = -1 # find patch port if not patch_port is None: port_rt = SshCommand.ssh_exec(user_name, node_ip, 'sudo ovs-ofctl dump-ports br-int ' + patch_port) if port_rt is not None: line = port_rt.splitlines() if '?' in line[2]: line[2] = line[2].replace('?', '0') tmp = line[2].split(',') patch_tx_packet_cnt = int(tmp[0].split('=')[1]) return rx_packet_cnt, tx_packet_cnt, err_dict, patch_tx_packet_cnt except: LOG.exception() return -1, -1, err_dict, -1
def exist_command(req): try: cmd = req['command'] if cmd not in COMMAND_MAP.keys(): return False return True except: LOG.exception() return False
def onos_ha_check(conn, db_log): try: stats_url = CONF.ha()['ha_proxy_server'] account = CONF.ha()['ha_proxy_account'] cmd = 'curl --user ' + account + ' --header \'Accept: text/html, application/xhtml+xml, image/jxr, */*\' \"' + stats_url + '\"' result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) output, error = result.communicate() if result.returncode != 0: LOG.error("Cmd Fail, cause => %s", error) return None else: report_data = csv.DictReader(output.lstrip('# ').splitlines()) dic_stat = dict() for row in report_data: if row['pxname'].strip() == 'stats' or row['svname'].strip( ) == 'BACKEND': continue dtl_list = { 'name': row['svname'], 'req_count': row['stot'], 'succ_count': row['hrsp_2xx'], 'node_sts': row['status'] } svc_type = row['pxname'] if (dic_stat.has_key(svc_type)): dic_stat[svc_type].append(dtl_list) else: dic_stat[svc_type] = list() dic_stat[svc_type].append(dtl_list) try: str_dic_stat = str(dic_stat) sql = 'UPDATE ' + DB.HA_TBL + \ ' SET stats = \"' + str_dic_stat + '\"' + \ ' WHERE ha_key = \"' + 'HA' + '\"' db_log.write_log('----- UPDATE HA INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] HA DB Update Fail.') except: LOG.exception() return dic_stat except: LOG.exception() return None
def is_monitor_item(node_type, item_type): try: conf_dict = CONF_MAP[node_type.upper()]() if conf_dict.has_key('alarm_off_list'): for item in (CONF_MAP[node_type.upper()]())['alarm_off_list']: if item_type in item: return False except: LOG.exception() return True
def process_event(conn, db_log, node_name, type, id, pre_value, cur_value, reason): try: if not is_monitor_item(type, id): return '-' elif pre_value != cur_value: occur_event(conn, db_log, node_name, id, pre_value, cur_value, reason) return cur_value except: LOG.exception()
def push_event(node_name, item, grade, pre_grade, reason, time, flush_alarm): global history_log try: history_log.write_log('[%s][%s][%s->%s] %s', node_name, item, pre_grade, grade, reason) sql = 'SELECT * FROM ' + DB.REGI_SYS_TBL with DB.connection() as conn: url_list = conn.cursor().execute(sql).fetchall() conn.close() for url, auth in url_list: header = { 'Content-Type': 'application/json', 'Authorization': str(auth) } req_body = { 'system': node_name, 'item': item, 'grade': grade, 'pre_grade': pre_grade, 'reason': reason, 'time': time } req_body_json = json.dumps(req_body) try: requests.post(str(url), headers=header, data=req_body_json, timeout=2) except: # Push event does not respond pass reason_str = '' if type(reason) == list: if len(reason) > 0: reason_str = '-- ' + '\n-- '.join(reason) else: reason_str = str(reason) ALARM.queue_alarm(node_name + ' ' + item + ' ' + grade.upper(), reason_str, time) if flush_alarm: ALARM.flush_pending_alarm() except: LOG.exception()
def get_ha_stats(ha_dic): try: ha_status = 'ok' ha_ratio = 'ok' list_reason = [] ratio_reason = [] frontend = 0 backend = 0 for key in dict(ha_dic).keys(): for line in ha_dic[key]: host = dict(line)['name'] status = dict(line)['node_sts'] if host == 'FRONTEND': if not 'OPEN' in status: list_json = { 'key': key, 'hostname': host, 'status': 'nok' } list_reason.append(list_json) ha_status = 'nok' frontend = int(dict(line)['req_count']) else: if not 'UP' in status: list_json = { 'key': key, 'hostname': host, 'status': 'nok' } list_reason.append(list_json) ha_status = 'nok' backend = backend + int(dict(line)['succ_count']) ratio = float(backend) * 100 / frontend if ratio < float(CONF.alarm()['ha_proxy']): ha_ratio = 'nok' ratio_reason.append(str(format(ratio, '.2f'))) except: LOG.exception() ha_status = 'fail' ha_ratio = 'fail' return ha_status, ha_ratio, list_reason, ratio_reason
def get_node_list(nodes, param, tbl=DB.NODE_INFO_TBL): try: if nodes == 'all': sql = 'SELECT ' + param + ' FROM ' + tbl else: sql = 'SELECT ' + param + ' FROM ' + tbl + ' WHERE nodename = \'' + nodes + '\'' with DB.connection() as conn: nodes_info = conn.cursor().execute(sql).fetchall() conn.close() return nodes_info except: LOG.exception() return None
def proc_dis_ha(dummy, param): try: sql = 'SELECT stats FROM ' + DB.HA_TBL + ' WHERE ha_key = \'HA\'' with DB.connection() as conn: nodes_info = conn.cursor().execute(sql).fetchone() conn.close() for value in nodes_info: return json.loads(str(value).replace('\'', '\"')) return {'HA': 'FAIL'} except: LOG.exception() return {'Result': 'FAIL'}
def send_response_traffic_test_old(cond, auth): trace_result_data = {} try: is_success, result = trace.traffic_test_old(cond) if is_success: trace_result_data['result'] = 'SUCCESS' else: trace_result_data['result'] = 'FAIL' # trace_result_data['fail_reason'] = 'The source ip does not exist.' if result != None: trace_result_data['traffic_test_result'] = result trace_result_data['transaction_id'] = cond['transaction_id'] try: LOG.info('%s', json.dumps(trace_result_data, sort_keys=True, indent=4)) except: pass req_body_json = json.dumps(trace_result_data) try: url = str(cond['app_rest_url']) #requests.post(str(url), headers=header, data=req_body_json, timeout=2) if str(auth).startswith('Basic '): auth = str(auth).split(' ')[1] cmd = 'curl -X POST -u \'' + CONF.onos( )['rest_auth'] + '\' -H \'Content-Type: application/json\' -d \'' + str( req_body_json) + '\' ' + url LOG.error('%s', 'curl = ' + cmd) result = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) result.communicate() if result.returncode != 0: # Push noti does not respond pass except: LOG.exception() pass except: LOG.exception()
def occur_event(conn, db_log, node_name, item, pre_grade, cur_grade, reason): try: time = str(datetime.now()) sql = 'UPDATE ' + DB.EVENT_TBL + \ ' SET grade = \'' + cur_grade + '\'' + ',' + \ ' pre_grade = \'' + pre_grade + '\'' + ',' + \ ' reason = \"' + str(reason) + '\"' + ',' + \ ' time = \'' + time + '\'' + \ ' WHERE nodename = \'' + node_name + '\' and item = \'' + item + '\'' db_log.write_log('----- UPDATE EVENT INFO -----\n' + sql) if DB.sql_execute(sql, conn) != 'SUCCESS': db_log.write_log('[FAIL] EVENT INFO DB Update Fail.') push_event(node_name, item, cur_grade, pre_grade, reason, time, False) except: LOG.exception()
def proc_dis_swarm_sync(node, param): try: nodes_info = get_node_list(node, 'nodename, synchronizer', DB.XOS_TBL) if len(nodes_info) == 0: return {'fail': 'This is not a command on the target system.'} res_result = dict() for nodename, xos_list in nodes_info: if xos_list == 'fail' or xos_list == 'none': res_result[nodename] = 'FAIL' else: res_result[nodename] = eval(xos_list) return res_result except: LOG.exception() return {'Result': 'FAIL'}
def exit(self): try: pf = file(PIDFILE, 'r') pid = int(pf.read().strip()) pf.close() LOG.info("--- Daemon STOP [fail to check rest server] ---") try: LOG.info('PID = ' + str(pid)) os.killpg(pid, SIGTERM) except OSError, err: err = str(err) if err.find("No such process") > 0: if os.path.exists(self.pidfile): os.remove(self.pidfile) except: LOG.exception()
def proc_dis_connection(node, param): try: nodes_info = get_node_list(node, 'nodename, ' + param, DB.ONOS_TBL) if len(nodes_info) == 0: return {'fail': 'This is not a command on the target system.'} res_result = dict() for nodename, value in nodes_info: if value == 'none': res_result[nodename] = 'FAIL' else: res_result[nodename] = eval(value) return res_result except: LOG.exception() return {'Result': 'FAIL'}