def update_config_from_remote(): global gconfig, alarm_proxy_host, mypublic_ip_port url = 'http://' + alarm_proxy_host url = urljoin(url, '/api/config/') my_headers = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36', 'Content-Type': 'application/json;charset=UTF-8', } config = {} try: res = requests.get(url, headers = my_headers, timeout = 5) if res.status_code == 200: if res.json().get('status') == 0: slog.info("get remote config ok, response: {0}".format(res.text)) config = res.json().get('config') except Exception as e: slog.warn("exception: {0}".format(e)) return False if not config: slog.warn("get remote config fail") return False if dict_cmp(config, gconfig): slog.info("get remote config same as default, no need udpate") return False # TODO(smaug) do something check for config gconfig = copy.deepcopy(config) slog.info('get remote config ok: {0}'.format(json.dumps(gconfig))) return True
def networksize_alarm(self, content): if not content: return False node_id = content.get('node_id') network_id = node_id[:17] # head 8 * 2 bytes # attention: specially for kroot_id 010000 if network_id.startswith('010000'): network_id = '010000' node_id_status = content.get('node_id_status') if node_id_status == 'remove': if network_id not in self.network_ids_: slog.warn('remove node_id:{0} from nonexistent network_id:{1}'. format(node_id, network_id)) return False for ni in self.network_ids_[network_id]['node_info']: if ni.get('node_id') == node_id: self.network_ids_[network_id]['node_info'].remove(ni) self.network_ids_[network_id]['size'] -= 1 slog.info( 'remove node_id:{0} from network_id:{1}, now size:{2}'. format(node_id, network_id, self.network_ids_[network_id]['size'])) break return True if network_id not in self.network_ids_: network_info = { 'node_info': [{ 'node_id': node_id, 'node_ip': content.get('node_ip') }], 'size': 1, } self.network_ids_[network_id] = network_info slog.info( 'add node_id:{0} to network_id:{1}, new network_id and now size is 1' .format(node_id, network_id)) return True else: for ni in self.network_ids_[network_id]['node_info']: if ni.get('node_id') == node_id: #slog.debug('already exist node_id:{0} in network_id:{1}'.format(node_id, network_id)) return True self.network_ids_[network_id]['node_info'].append({ 'node_id': node_id, 'node_ip': content.get('node_ip') }) self.network_ids_[network_id]['size'] += 1 slog.info( 'add node_id:{0} to network_id:{1}, now size is {2}'.format( node_id, network_id, self.network_ids_[network_id]['size'])) return True return True
def connect(self): try: mypool = redis.ConnectionPool(host = self.host_, port = self.port_, password = self.password_, decode_responses=True) self.myredis_ = redis.StrictRedis(connection_pool = mypool) except Exception as e: slog.warn('connect redis host:{0} port:{1} failed'.format(self.host_, self.port_)) self.myredis_ = None finally: return self.myredis_
def run(alarm_type, alarm_env = 'test'): global mq all_queue_key = mq.get_all_queue_keys() # set of queue_key qkey_map = { 'packet': [], 'networksize': [], 'system': [], } for qkey in all_queue_key: if qkey.find('packet') != -1: qkey_map['packet'].append(qkey) elif qkey.find('networksize') != -1: qkey_map['networksize'].append(qkey) elif qkey.find('system') != -1: qkey_map['system'].append(qkey) slog.warn('qkey_map:{0}'.format(json.dumps(qkey_map))) consumer_list = [] if alarm_type == 'packet' or alarm_type == 'all': # packet for qkey in qkey_map.get('packet'): slog.warn('create consumer for packet, assign queue_key:{0}'.format(qkey)) consumer = packet_consumer.PacketAlarmConsumer(q=mq, queue_key_list = [qkey], alarm_env = alarm_env) consumer_list.append(consumer) if alarm_type == 'networksize' or alarm_type == 'all': qkey = qkey_map.get('networksize') qkey.extend(qkey_map.get('system')) slog.warn('create consumer for networksize/system, assign queue_key:{0}'.format(json.dumps(list(qkey)))) consumer = networksize_consumer.NetworkSizeAlarmConsumer(q=mq, queue_key_list = list(qkey), alarm_env = alarm_env) consumer_list.append(consumer) # TODO(smaug) add other type here in the future if not consumer_list: slog.warn("no consumer created") return print(consumer_list) process_list = [] for c in consumer_list: p = Process(target=c.run) process_list.append(p) slog.warn('{0} consumer started, ==== start'.format(len(consumer_list))) for p in process_list: p.start() for p in process_list: p.join() return
def put_alarmq_high(alarm_payload): global ALARMQ_HIGH try: ALARMQ_HIGH.put(alarm_payload, block=True, timeout =2) slog.info("put alarm_queue_high:{0} size:{1} item:{2}".format(ALARMQ_HIGH, ALARMQ_HIGH.qsize(),json.dumps(alarm_payload))) except Exception as e: slog.warn("queue full, drop alarm_payload") return False return True
def put_alarmq(alarm_payload): global ALARMQ try: ALARMQ.put(alarm_payload, block=True, timeout =2) slog.info("put send_queue:{0} size:{1}, item:{2}".format(ALARMQ, ALARMQ.qsize(),json.dumps(alarm_payload))) except Exception as e: slog.warn("queue full, drop alarm_payload") return False return True
def get_node_ip(self, node_id): network_id = node_id[:17] # head 8 * 2 bytes if network_id.startswith('010000'): network_id = '010000' if network_id not in self.network_ids_: return '' for ni in self.network_ids_[network_id]['node_info']: if ni.get('node_id') == node_id: return ni.get('node_ip') slog.warn('get no node_ip of node_id:{0}'.format(node_id)) return ''
def get_node_ip(self, node_id): network_id = node_id[: 12] # head 3 + 1 + 1 + 1 = 6 bytes, that is 6 * 2 = 12 char of prefix. if network_id.startswith('ffffff'): network_id = 'ffffff' if network_id not in self.network_ids_: return '' for ni in self.network_ids_[network_id]['node_info']: if ni.get('node_id') == node_id: return ni.get('node_ip') slog.warn('get no node_ip of node_id:{0}'.format(node_id)) return ''
def watchlog(filename, offset = 0): try: #log_handle = open(filename, 'r',encoding="utf-8", errors='replace') log_handle = open(filename, 'r',encoding="utf-8") #log_handle = open(filename, 'r',encoding="latin-1") except Exception as e: slog.warn("open file exception: {0}".format(e)) return offset wait_num = 0 #log_handle.seek(0, 2) # go to end log_handle.seek(offset, 0) # go to offset from head cur_pos = log_handle.tell() while True: cur_pos = log_handle.tell() try: line = log_handle.readline() except Exception as e: slog.warn("readline exception:{0}, cur_pos:{1}".format(e, cur_pos)) continue if not line: wait_num += 1 log_handle.seek(cur_pos) # go to cur_pos from head time.sleep(1) slog.info("sleep 1 s, cur_pos: {0}".format(cur_pos)) print_queue() if wait_num > 4: slog.debug("file: {0} done watch, size: {1}".format(filename, cur_pos)) break else: send_size, recv_size = grep_log(line) wait_num = 0 # judge new file "$filename" created if not os.path.exists(filename): return cur_pos try: new_log_handle = open(filename, 'r',encoding="latin-1") except Exception as e: return cur_pos new_log_handle.seek(0, 2) # go to end new_size = new_log_handle.tell() if new_size > cur_pos: return cur_pos if new_size == cur_pos: slog.info('logfile:{0} maybe stopped'.format(filename)) check_progress(filename) return cur_pos # new file "$filename" created slog.info("new file: {0} created".format(filename)) return 0
def consume_alarm_with_notry(self): while True: slog.info("begin consume_alarm alarm_queue.size is {0}".format( self.alarm_queue_.qsize(self.queue_key_list_))) alarm_payload_list = self.alarm_queue_.get_queue_exp( self.queue_key_list_, self.consume_step_) # return dict or None for alarm_payload in alarm_payload_list: alarm_type = alarm_payload.get('alarm_type') if alarm_type == 'demo': self.demo_alarm(alarm_payload.get('alarm_content')) else: slog.warn('invalid alarm_type:{0}'.format(alarm_type)) return
def run(args): global gconfig, alarm_proxy_host, mypublic_ip_port if args.alarm.find(':') == -1: slog.error('alarm proxy host invalid') return 1 alarm_proxy_host = args.alarm alarm_filename = args.file start_print = 'agent start... host:{0} file:{1}\n'.format(alarm_proxy_host, alarm_filename) slog.info(start_print) print(start_print) if update_config_from_remote(): slog.warn('using remote config to start: {0}'.format(json.dumps(gconfig))) else: slog.error('using local config to start: {0}'.format(json.dumps(gconfig))) #run_watch_stream(alarm_filename) update_config_th = threading.Thread(target = update_config) update_config_th.daemon = True update_config_th.start() slog.info('start update config from remote thread') watchlog_th = threading.Thread(target = run_watch_stream, args = (alarm_filename, )) watchlog_th.daemon = True watchlog_th.start() slog.info("start watchlog thread") sys_cron_th = threading.Thread(target = system_cron_job) sys_cron_th.daemon = True sys_cron_th.start() slog.info("start system_cron_job thread") con_send_th = threading.Thread(target = consumer_alarm) con_send_th.daemon = True con_send_th.start() slog.info("start consumer_alarm thread") con_recv_th = threading.Thread(target = consumer_alarm_high) con_recv_th.daemon = True con_recv_th.start() slog.info("start consumer_alarm_high thread") return 0
def put_queue(self, item): if not isinstance(item, dict): return # TODO(smaug) for packet using uniq_chain_hash; other type using other hash qkey = self.get_queue_key_of_alarm(item) # item is dict, serialize to str # TODO(smaug) size = self.qsize([qkey]) if size >= 500000: slog.warn("queue_key:{0} size {1} beyond 500000".format(qkey, size)) return self.myredis.lpush(qkey, json.dumps(item)) slog.debug("put_queue alarm:{0} in queue {1}, now size is {2}".format(json.dumps(item), qkey, self.qsize([qkey]))) return
def remove_dead_node(self, node_ip): network_ids_bak = copy.deepcopy(self.network_ids_) for k, v in network_ids_bak.items(): for i in range(len(v.get('node_info'))): ni = v.get('node_info')[i] if ni.get('node_ip') == node_ip: del self.network_ids_[k]['node_info'][i] self.network_ids_[k]['size'] -= 1 slog.warn('remove dead node_id:{0} node_ip:{1}'.format( ni.get('node_id'), ni.get('node_ip'))) if len(self.network_ids_[k]['node_info']) == 0: del self.network_ids_[k] for k, v in self.network_ids_.items(): slog.info('network_ids key:{0} size:{1}'.format(k, v.get('size'))) return
def load_db_networksize(self): # TODO(smaug) not use for now return True vs, total = [], 0 vs, total = self.network_info_sql_.query_from_db(data) if not vs: slog.warn('load network_info from db failed') return False for item in vs: self.network_ids_[item.get('network_id')] = json.loads( item.get('network_info')) slog.info( 'load network_info from db ok, network_id:{0} size:{1}'.format( item.get('network_id'), self.network_ids_.get(item.get('network_id')).get('size'))) return True
def system_alarm_info_query(): tnow = int(time.time() * 1000) public_ip_port = request.args.get('public_ip_port') or None root = request.args.get('root') or None priority = request.args.get('priority') or None begin = request.args.get('begin') or (tnow - 1 * 60 * 60 * 1000 ) # latest 1 hour end = request.args.get('end') or tnow limit = request.args.get('limit') or 200 page = request.args.get('page') or 1 status_ret = { 0: 'OK', -1: '没有数据', -2: '参数不合法', } priority_list = [] if priority: tmp_priority_list = priority.split(',') for p in tmp_priority_list: try: p = int(p) priority_list.append(p) except Exception as e: slog.warn('catch exception:{0}'.format(e)) data = { 'public_ip_port': public_ip_port, 'root': root, 'priority': priority_list, 'begin': begin, 'end': end } results, total = mydash.get_system_alarm_info(data, page=page, limit=limit) if results: ret = { 'status': 0, 'error': status_ret.get(0), 'results': results, 'total': total } return jsonify(ret) else: ret = {'status': -1, 'error': status_ret.get(-1), 'results': results} return jsonify(ret)
def dump_db_node_info(self, content): # only remove node or add new node if not content: return False node_ip = content.get('node_ip') # ip:port node_id = content.get('node_id') send_timestamp = content.get('send_timestamp') or int( time.time() * 1000) value = copy.deepcopy(self.node_info_.get(node_ip)) if not value: slog.warn('invalid node_id:{0} node_ip:{1}'.format( node_id, node_ip)) return print(value) for k in copy.deepcopy(list(value.keys())): if not value.get(k): value.pop(k) if k in ['rec', 'zec', 'edg', 'arc', 'adv', 'val']: value[k] = json.dumps(value.get(k)) self.node_info_sql_.update_insert_to_db(value) slog.info("dump node_info to db:{0}".format(json.dumps(value))) # upadte system_alarm_info alarm_info = '' root_id = '' priority = PRIORITY_DICT.get('low') node_id_status = content.get('node_id_status') if node_id_status == 'remove': alarm_info = 'remove node_id:{0}'.format(node_id) elif node_id_status == 'dead': root_id = node_id # ffffff alarm_info = 'xtopchain down' priority = PRIORITY_DICT.get('high') else: alarm_info = 'add node_id:{0}'.format(node_id) if not root_id and self.node_info_.get(node_ip): root_id = self.node_info_.get(node_ip).get('root') or '' self.dump_db_system_alarm_info(node_ip, root_id, priority, alarm_info, send_timestamp) return
def dump_db_network_id_num(self, network_id): if network_id.startswith('ffffff'): return if network_id not in self.network_id_num_: self.load_db_network_id_num() if network_id in self.network_id_num_: # already in db return net_type = '' if network_id.startswith('ff0000010000'): net_type = 'rec' elif network_id.startswith('ff0000020000'): net_type = 'zec' elif network_id.startswith('ff00000f0101'): net_type = 'edg' elif network_id.startswith('ff00000e0101'): net_type = 'arc' elif network_id.startswith('ff00000001'): tmp_group_id = int(network_id[-2:], 16) if 0 <= tmp_group_id and tmp_group_id <= 63: # adv group_id: [0, 64) net_type = 'adv' elif 64 <= tmp_group_id and tmp_group_id <= 126: # val group_id: [64, 127) net_type = 'val' else: slog.warn('not support network_id:{0} for map-num'.format( network_id)) return else: slog.warn( 'not support network_id:{0} for map-num'.format(network_id)) return data = {'network_id': network_id, 'network_type': net_type} self.network_id_num_sql_.insert_to_db(data) slog.info('dump network_id_num to db:{0}'.format(json.dumps(data))) self.load_db_network_id_num() return
def get_network_num_of_ip(self, public_ip_port): result = [] if not self.node_info_.get(public_ip_port): slog.warn('get node_info of ip:{0} failed'.format(public_ip_port)) return result for net_choice in ['rec', 'zec', 'edg', 'arc', 'adv', 'val']: net_id_list = self.node_info_.get(public_ip_port).get(net_choice) if not net_id_list: continue for node_id in net_id_list: network_id = node_id[:12] if not self.network_id_num_.get(network_id): continue network_num = self.network_id_num_.get(network_id).get( 'network_num') result.append(network_num) slog.debug('get_network_num:{0} of ip:{1}'.format( json.dumps(result), public_ip_port)) return result
def consume_alarm(self): while True: slog.info("begin consume_alarm alarm_queue.size is {0}".format( self.alarm_queue_.qsize(self.queue_key_list_))) try: alarm_payload_list = self.alarm_queue_.get_queue_exp( self.queue_key_list_, self.consume_step_) # return dict or None for alarm_payload in alarm_payload_list: alarm_type = alarm_payload.get('alarm_type') if alarm_type == 'networksize': self.networksize_alarm( alarm_payload.get('alarm_content')) elif alarm_type == 'system': self.system_cron_alarm( alarm_payload.get('alarm_content')) else: slog.warn('invalid alarm_type:{0}'.format(alarm_type)) except Exception as e: slog.warn('catch exception:{0}'.format(e)) return
def do_alarm(alarm_list): global alarm_proxy_host url = 'http://' + alarm_proxy_host url = urljoin(url, '/api/alarm/') my_headers = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36', 'Content-Type': 'application/json;charset=UTF-8', } #my_data = json.dumps(alarm_list) my_data = { 'token': 'testtoken', 'data': [] } my_data['data'] = alarm_list my_data = json.dumps(my_data) slog.info("do_alarm: {0}".format(my_data)) try: #res = requests.post(url, headers = my_headers,data = my_data, timeout = 5) res = mysession.post(url, headers = my_headers,data = my_data, timeout = 5) if res.status_code == 200: if res.json().get('status') == 0: slog.info("send alarm ok, response: {0}".format(res.text)) return True else: slog.warn("send alarm fail, response: {0}".format(res.text)) else: slog.warn('send alarm fail: {0}'.format(res.text)) except Exception as e: slog.warn("exception: {0}".format(e)) return False
def system_cron_alarm(self, content): now = (time.time() * 1000) send_timestamp = content.get('send_timestamp') if abs(now - send_timestamp) > 10 * 60 * 1000: slog.warn('system_cron_alarm expired, diff:{0} ms'.format( abs(now - send_timestamp))) return if content.get('send_timestamp') % (60 * 1000) % 1000 != 0: slog.warn('system_cron_alarm send_timestamp:{0} invalid'.format( content.get('send_timestamp'))) return network_num_result = self.get_network_num_of_ip( content.get('public_ip_port')) for num in network_num_result: if num < 1 or num > 10: slog.warn('network_num:{0} invalid'.foramt(num)) continue db_net_field = 'net{0}'.format(num) content[db_net_field] = 1 self.system_cron_info_sql_.insert_to_db(content) slog.debug('insert system_cron_info to db:{0}'.format( json.dumps(content))) return
def check_progress(filename): global gconfig, mark_down_flag, mypublic_ip_port, my_root_id if mark_down_flag: return False cmd = 'ps -ef |grep topio |grep xnode |grep -v grep' #cmd = 'lsof {0} |grep xtopchain'.format(filename) result = os.popen(cmd).readlines() if result: return False slog.warn('topio down!! topio down!! topio down!! filename:{0}'.format(filename)) mark_down_flag = True alarm_payload = { 'alarm_type': gconfig.get('grep_networksize').get('alarm_type'), 'alarm_content': { 'node_ip': mypublic_ip_port, 'node_id': my_root_id, 'node_id_status': 'dead', 'send_timestamp': int(time.time() * 1000), 'info': 'topio xnode down!' }, } put_alarmq_high(alarm_payload) return True
def consumer_alarm_high(): global ALARMQ, ALARMQ_HIGH, gconfig th_name = threading.current_thread().name alarm_pack_num = gconfig.get('alarm_pack_num') alarm_pack_num = 1 alarm_list = [] while True: try: slog.info("consumer thread:{0} recv_queue:{1} size:{2}".format(th_name, ALARMQ_HIGH, ALARMQ_HIGH.qsize())) while not ALARMQ_HIGH.empty(): alarm_payload = ALARMQ_HIGH.get() alarm_list.append(alarm_payload) if len(alarm_list) >= alarm_pack_num: slog.info("alarm_high do_alarm") if not do_alarm(alarm_list): slog.warn("alarm_high send failed, put in queue again") for item in alarm_list: put_alarmq_high(item) alarm_list.clear() time.sleep(1) except Exception as e: pass
def load_db_network_id_num(self): vs, total = [], 0 vs, total = self.network_id_num_sql_.query_from_db(data={}) if not vs: slog.warn('load network_id_num from db failed or empty') return False for item in vs: # just for safety check network_id = item.get('network_id') network_num = item.get('network_num') if network_id in self.network_id_num_: if self.network_id_num_.get(network_id).get( 'network_num') != network_num: slog.warn( 'load network_id_num from db goes wrong, db_network_num:{0} not eq cache_network_num:{1}' .format( network_num, self.network_id_num_.get(network_id).get( 'network_num'))) slog.warn( 'load network_id_num from db goes wrong, db_network_num:{0} not eq cache_network_num:{1}' .format( network_num, self.network_id_num_.get(network_id).get( 'network_num'))) slog.warn( 'load network_id_num from db goes wrong, db_network_num:{0} not eq cache_network_num:{1}' .format( network_num, self.network_id_num_.get(network_id).get( 'network_num'))) sys.exit(-1) self.network_id_num_[item.get('network_id')] = item slog.info('load network_id_num from db success:{0}'.format( json.dumps(self.network_id_num_))) return True
def get_system_cron_info(self,data, page = 1, limit = 200000): ''' data = { 'public_ip_port': public_ip_port, 'network_id': network_id, 'begin': begin, 'end': end } ''' tbegin = int(time.time() * 1000) results = {} # key is db_filed:/cpu/mem/band ; value is list of list [[time,value], [time,value]] tmp_result = {} # key is timestamp cols = 'public_ip_port,send_timestamp' cols_list = [] if data.get('mem') == 'true': cols += ',mem' cols_list.append('mem') if data.get('send_bandwidth') == 'true': cols += ',send_bandwidth' cols_list.append('send_bandwidth') if data.get('recv_bandwidth') == 'true': cols += ',recv_bandwidth' cols_list.append('recv_bandwidth') if data.get('send_packet') == 'true': cols += ',send_packet' cols_list.append('send_packet') if data.get('recv_packet') == 'true': cols += ',recv_packet' cols_list.append('recv_packet') if cols.endswith('send_timestamp') or data.get('cpu') == 'true': cols += ',cpu' cols_list.append('cpu') tmp_value = {} for k in cols_list: # {mem:xx,cpu:xx,send_bandwidth:xx....} tmp_value[k] = 0 results[k] = [] tmp_value['count'] = 0 network_num = None if data.get('network_id'): network_id = data.get('network_id')[:12] if network_id not in self.network_id_num_: vs = self.load_db_network_id_num(data = {}) for item in vs: self.network_id_num_[item.get('network_id')] = item slog.info('load network_id_num from db size:{0}'.format(len(vs))) if network_id not in self.network_id_num_: slog.warn('can not find network_num of network_id:{0}'.format(network_id)) return results network_num = self.network_id_num_.get(network_id).get('network_num') slog.debug('get network_num:{0} of network_id:{1}'.format(network_num, network_id)) if network_num != None: net_field = 'net{0}'.format(network_num) data[net_field] = 1 vs,total = [],0 vs,total = self.system_cron_info_sql_.query_from_db(data, cols = cols, page = page, limit = limit) if not vs: slog.debug('system_cron_info_sql query_from_db failed, data:{0}'.format(json.dumps(data))) return results print('query fom db size;{0}'.format(len(vs))) for item in vs: send_timestamp = item.get('send_timestamp') if send_timestamp not in tmp_result: tmp_result[send_timestamp] = copy.deepcopy(tmp_value) for k in cols_list: tmp_result[send_timestamp][k] += item.get(k) tmp_result[send_timestamp]['count'] += 1 for timest,tvalue in tmp_result.items(): for name,sumv in tvalue.items(): if name == 'count': continue point = [timest, sumv / tvalue['count']] results[name].append(point) slog.debug('system_cron result:{0}'.format(json.dumps(results))) tend = int(time.time() * 1000) slog.debug('get_system_cron_info taking:{0} ms'.format(tend - tbegin)) return results
def load_db_network_id_num(self, data, limit = 100, page = 1): vs,total = [],0 vs, total = self.network_id_num_sql_.query_from_db(data = data, limit = limit, page = page) if not vs: slog.warn('load network_id_num from db failed or empty') return vs
def get_packet_drop(self, data): begin = data.get('begin') # ms end = data.get('end') # ms tmp_time = begin time_list = [] time_drop_map = {} while tmp_time <= end: tmp_time = tmp_time + 60 * 1000 # 1 min time_list.append(tmp_time) time_drop_map[tmp_time] = [] time_list.append(tmp_time) time_drop_map[tmp_time] = [] slog.debug('time_list size {0}'.format(len(time_list))) results = [] # get packet info from db vs,total = [],0 limit, page = None, None cols = 'uniq_chain_hash,send_timestamp,dest_networksize,recv_nodes_num' vs,total = self.packet_info_sql.query_from_db(data, cols = cols, limit = limit, page = page) if not vs: slog.debug('packet_info_sql query_from_db failed, data:{0}'.format(json.dumps(data))) return results for item in vs: #uniq_chain_hash = item.get('uniq_chain_hash') dest_networksize = item.get('dest_networksize') recv_nodes_num = item.get('recv_nodes_num') if int(dest_networksize) <= 0: slog.warn("dest_networksize smaller than 0") continue send_timestamp = item.get('send_timestamp') time_index = int((int(send_timestamp) - int(begin)) / (60 * 1000)) drop_rate = 100 - (float(recv_nodes_num) / float(dest_networksize) * 100) drop_rate = "%.1f" % drop_rate drop_rate = float(drop_rate) if recv_nodes_num >= dest_networksize: drop_rate = 0.0 if time_index > (len(time_list) - 1): slog.warn('time_index:{0} beyond time_list length:{1}'.format(time_index, len(time_list))) continue time_drop_map[time_list[time_index]].append(drop_rate) for k,v in time_drop_map.items(): if not v: continue sum_drop_rate = 0.0 for item in v: #slog.debug('drop_rate: {0}'.format(item)) sum_drop_rate += item slog.debug('sum_drop_rate:{0} size:{1}'.format(sum_drop_rate, len(v))) avg_drop_rate = sum_drop_rate / len(v) avg_drop_rate = "%.1f" % avg_drop_rate avg_drop_rate = float(avg_drop_rate) results.append([k, avg_drop_rate]) ''' tmp_drop_db_item = { "network_id": dest_node_id, "timestamp": k, "drop_rate": avg_drop_rate } self.packet_drop_rate_sql.insert_to_db(tmp_drop_db_item) ''' results.sort(key=get_list_first) ''' #print(results) if results: results[0][1] = 1.1 results[-1][1] = 1.1 ''' return results
def get_network_ids(self, data): result = { 'node_info': [], 'node_size': 0, } now = int(time.time() * 1000) node_size = 0 with self.network_ids_lock_: # not exist or expired beyond 1 min, then reread from shm if not self.network_ids_ or self.network_ids_.get('update_timestamp').get('update_timestamp') < (now - 1 * 60 * 1000): if not self.update_network_ids(): return result if data.get('network_id'): # get network_id for k,v in self.network_ids_.items(): if not v or not v.get('node_info'): continue if k.startswith(data.get('network_id')) or k == data.get('network_id'): if data.get('onlysize') != True: result['node_info'].extend(v.get('node_info')) node_size += len(v.get('node_info')) result['node_size'] = node_size slog.info('get_network_ids success.') return result elif data.get('node_id') or data.get('node_ip'): node_ip = data.get('node_ip') if not node_ip: # get info of node_id node_id = data.get('node_id') if not node_id: return result for k,v in self.network_ids_.items(): if node_ip: break if not v or not v.get('node_info'): continue for item in v.get('node_info'): if item.get('node_id').startswith(node_id): node_ip = item.get('node_ip') slog.warn('get_node_ip ok of node_id:{0} node_ip:{1}'.format(data.get('node_id'), node_ip)) break if not node_ip: slog.warn('get_node_ip failed of node_id:{0} node_ip:{1}'.format(data.get('node_id'), data.get('node_ip'))) return result node_size = 0 for k,v in self.network_ids_.items(): if not v or not v.get('node_info'): continue for item in v.get('node_info'): if item.get('node_ip').split(':')[0] == node_ip.split(':')[0]: if data.get('onlysize') != True: result['node_info'].append(item) node_size += 1 result['node_size'] = node_size slog.info('get_network_ids of node_id:{0} node_ip:{1} success. result:{2}'.format(data.get('node_id'), data.get('node_ip'), json.dumps(result))) return result else: # get all network_ids node_size = 0 for k,v in self.network_ids_.items(): if not v or not v.get('node_info'): continue vinfo = v.get('node_info') if not vinfo: continue if data.get('onlysize') != True: result['node_info'].extend(vinfo) node_size += len(vinfo) result['node_size'] = node_size slog.info('get_network_ids success. result:{0}'.format(result)) return result
p = Process(target=c.run) process_list.append(p) slog.warn('{0} consumer started, ==== start'.format(len(consumer_list))) for p in process_list: p.start() for p in process_list: p.join() return if __name__ == '__main__': parser = argparse.ArgumentParser() parser.description='TOP-Argus consumer,多进程方式启动一个消费者,绑定到某个类型的报警内容,进行消费' parser.add_argument('-t', '--type', help='bind with alarm_type,eg: packet, networksize...', default = '') parser.add_argument('-e', '--env', help='env, eg: test,docker', default = 'test') args = parser.parse_args() if not args.type: slog.warn("please give one type or 'all'") sys.exit(-1) alarm_type = args.type alarm_env = args.env print('type:{0} env:{1}'.format(alarm_type,alarm_env)) slogging.start_log_monitor() run(alarm_type, alarm_env)
def packet_query(): uniq_chain_hash = request.args.get('uniq_chain_hash') or None chain_hash = request.args.get('chain_hash') or None chain_msgid = request.args.get('chain_msgid') or None is_root = request.args.get('is_root') or None broadcast = request.args.get('broadcast') or None send_node_id = request.args.get('send_node_id') or None src_node_id = request.args.get('src_node_id') or None dest_node_id = request.args.get('dest_node_id') or None limit = request.args.get('limit') or 200 page = request.args.get('page') or 1 status_ret = { 0: 'OK', -1: '没有数据', -2: '参数不合法', } try: if uniq_chain_hash: uniq_chain_hash = int(uniq_chain_hash) if chain_hash: chain_hash = int(chain_hash) if chain_msgid: chain_msgid = int(chain_msgid) if limit != None: limit = int(limit) if page != None: page = int(page) except Exception as e: slog.warn("catch exception:{0}".format(e)) ret = {'status': -2, 'error': status_ret.get(-2), 'results': []} return jsonify(ret) data = { 'uniq_chain_hash': uniq_chain_hash, 'chain_hash': chain_hash, 'chain_msgid': chain_msgid, 'is_root': is_root, 'broadcast': broadcast, 'send_node_id': send_node_id, 'src_node_id': src_node_id, 'dest_node_id': dest_node_id, } results, total = mydash.get_packet_info(data, limit, page) if results: ret = { 'status': 0, 'error': status_ret.get(0), 'results': results, 'total': total } return jsonify(ret) else: ret = { 'status': -1, 'error': status_ret.get(-1), 'results': results, 'total': total } return jsonify(ret)