def query_cluster_status(request): #统计每个集群的可用性 try: cluster = request.POST['cluster'] range = request.POST['range'] end_timestamp = int(time.time()) start_timestamp = end_timestamp - int(range) #先统计有哪些集群名 if cluster == 'all': cluster_list = [i['fcluster'] for i in Service.objects.values('fcluster').distinct() if i['fcluster']] else: cluster_list = [cluster] counters = ['cluster.available.percent/clusterName=%s,project=oms'%i for i in cluster_list] endpoint = get_local_ip() f = Falcon() history_data = f.get_history_data(start_timestamp, end_timestamp, [endpoint], counters, step=24*60*60, CF='AVERAGE') data = [] for i in history_data: if i['counter']: tags = i['counter'].split('/')[1] tag_dict = gen_tags(tags) cluster = tag_dict.get('clusterName') ts_value = [] c_date = time.strftime("%Y-%m-%d") c_timestamp = int(time.mktime(time.strptime(c_date, '%Y-%m-%d'))) * 1000 for j in i['Values']: if (j['timestamp'] - 8*60*60)*1000 == c_timestamp: ts_value.append([c_timestamp, get_cluster_available(cluster, c_date)]) else: ts_value.append([(j['timestamp'] - 8*60*60)*1000, j['value']]) #这里要减去8个小时,是因为rrd里存的点的时刻是8点钟 data.append({'data':ts_value, 'name':cluster}) except: print traceback.format_exc() return JsonResponse({'code': 0, 'data': data, 'message': 'ok'})
def query_graph(self, request): fid = request.POST['fid'] range = request.POST['range'] end_ts = int(time.time()) start_ts = end_ts - int(range) service_obj = Service.objects.get(fid=fid) endpoint = service_obj.fhost #统计该组件对应的大metric有哪些 all_data = [] metric_list = [] f = Falcon() for metric, item_list in items[service_obj.fname].items(): metric_list.append(metric) counter_list = ['%s/%sport=%s'%(i, service_obj.fname, service_obj.fport) for i in item_list] #print counter_list history_data = f.get_history_data(start_ts, end_ts, [endpoint], counter_list, CF='AVERAGE') #print history_data[0] hdata = [] for i in history_data: hdata.append({'name':i['counter'], 'data':[[j['timestamp']*1000, j['value'] ]for j in i['Values']]}) all_data.append({'metric':metric, 'hdata':hdata}) print all_data #同时渲染文件 f = open('templates/service/chart_div_tmp.html') t = template.Template(f.read()) con = template.Context({'data': serialize_number(len(all_data), 2), 'metric_list':metric_list}) res = t.render(con) f.close() return JsonResponse({'code':0, 'data':{'all_data':all_data, 'html_content':res}, 'message':'ok'})
def query_port(self, request): host = request.POST['host'] port = request.POST['port'] start_time = request.POST['start_time'] end_time = request.POST['end_time'] start_ts = int(time.mktime(time.strptime(start_time,'%Y-%m-%d %H:%M:%S'))) end_ts = int(time.mktime(time.strptime(end_time, '%Y-%m-%d %H:%M:%S'))) counter = '%s/port=%s,project=oms'%(settings.port_listen_key, port) f = Falcon() history_data = f.get_history_data(start_ts, end_ts, [host], [counter], CF='AVERAGE') hdata = [] for i in history_data: hdata.append({'name': i['endpoint'], 'data': [[j['timestamp'] * 1000, j['value']] for j in i['Values']]}) return JsonResponse({'code': 0, 'data': {'hdata': hdata}, 'message': 'ok'})
def query_service_top(self, request): try: cluster = request.POST['cluster'] date = request.POST['date'] topn = int(request.POST['topn']) start_ts = int(time.mktime(time.strptime(date, '%Y-%m-%d'))) end_ts = start_ts + 86400 #先插集群下有哪些ip和端口 endpoints = [] ports = [] for i in Service.objects.filter(fcluster=cluster): if i.fhost: endpoints.append(i.fhost) if i.fport: ports.append(i.fport) endpoints = list(set(endpoints)) ports = list(set(ports)) counters = ['%s/port=%s,project=oms' % (settings.port_listen_key, i) for i in ports] f = Falcon() history_data = f.get_history_data(start_ts, end_ts, endpoints, counters, step=60, CF='AVERAGE') print history_data names = [] values = [] data = {} for i in history_data: if i['Values']: host = i['endpoint'] tags = i['counter'].split('/')[1] tag_dict = gen_tags(tags) port = tag_dict['port'] if not Service.objects.filter(fhost=host,fport=port,fcluster=cluster): continue # 计算故障率 fail_count = 0 for j in i['Values']: if j['value'] == 0: fail_count += 1 fail_rate = '%.2f' % (float(fail_count) / len(i['Values']) * 100) fail_rate = float(fail_rate) data['%s:%s' % (host, port)] = fail_rate # 排序 if data: print data data = sorted(data.items(), key=lambda x: x[1], reverse=True)[:topn] for k, v in data: names.append(k) values.append(v) except: print traceback.format_exc() return JsonResponse({'code': 0, 'data': {'names': names, 'values': values}, 'message': 'ok'})
def query_graph2(self, request): fid = request.POST['fid'] counter = request.POST['counter'] range = request.POST['range'] end_ts = int(time.time()) start_ts = end_ts - int(range) service_obj = Service.objects.get(fid=fid) endpoint = service_obj.fhost f = Falcon() history_data = f.get_history_data(start_ts, end_ts, [endpoint], [counter], CF='AVERAGE') # print history_data[0] hdata = [] for i in history_data: hdata.append({'name': i['endpoint'], 'data': [[j['timestamp']*1000, j['value']] for j in i['Values']]}) return JsonResponse({'code': 0, 'data': {'hdata': hdata}, 'message': 'ok'})
def get_cluster_available(cluster, c_date): #获取某一天某个集群的可用率 start_ts = int(time.mktime(time.strptime(c_date, '%Y-%m-%d'))) end_ts = start_ts + 86400 host_list = [] port_list = [] weight_info = {} for j in Service.objects.filter(fcluster=cluster): if j.fhost: host_list.append(j.fhost) if j.fport: port_list.append(str(j.fport)) if j.fhost and j.fport: weight_info['%s_%s' % (j.fhost, j.fport)] = j.fweight host_list = list(set(host_list)) port_list = list(set(port_list)) counter_list = ['listen.port/port=%s,project=oms' % i for i in port_list] f = Falcon() data = f.get_history_data(start_ts, end_ts, host_list, counter_list, step=60) #print data success_count = 0 total = 0 for i in data: values = i['Values'] counter = i['counter'] endpoint = i['endpoint'] tags = counter.split('/')[1] tag_dict = gen_tags(tags) port = tag_dict['port'] if values: weight = weight_info.get('%s_%s' % (endpoint, port), 1) success_list = [j['value'] for j in values if j['value'] != 0] success_count += weight * len(success_list) total += weight * len(values) if success_count == 0: cluster_available_rate = 100 else: cluster_available_rate = float('%.2f' % (float(success_count) / total * 100)) return cluster_available_rate
def query_service_top(request): range = request.POST['range'] topn = int(request.POST['topn']) end_timestamp = int(time.time()) start_timestamp = end_timestamp - int(range) #先统计有endporints和couters endpoints = [] ports = [] for i in Service.objects.all(): if i.fhost: endpoints.append(i.fhost) if i.fport: ports.append(i.fport) endpoints = list(set(endpoints)) ports = list(set(ports)) counters = ['%s/port=%s,project=oms'%(settings.port_listen_key, i) for i in ports] f = Falcon() history_data = f.get_history_data(start_timestamp, end_timestamp, endpoints, counters, step=60, CF='AVERAGE') names = [] values = [] data = {} for i in history_data: if i['Values']: host = i['endpoint'] tags = i['counter'].split('/')[1] tag_dict = gen_tags(tags) port = tag_dict['port'] #计算故障率 fail_count = 0 for j in i['Values']: if j['value'] == 0: fail_count += 1 fail_rate = '%.2f'%(float(fail_count)/len(i['Values'])*100) fail_rate = float(fail_rate) data['%s:%s'%(host, port)] = fail_rate #排序 data = sorted(data.items(), key=lambda x: x[1], reverse=True)[:topn] for k, v in data: names.append(k) values.append(v) print values return JsonResponse({'code': 0, 'data': {'names':names, 'values':values}, 'message': 'ok'})