def get_agent_status(request, cc_biz_id): """获取agent安装状态 :param request: :param cc_biz_id: :return: """ hosts_list = CCBiz.hosts(cc_biz_id).get('data') or [] hostid_list = [host_key(h) for h in hosts_list] agent_status_info = CCBiz.agent_status(cc_biz_id, hostid_list) if hosts_list and not len(agent_status_info): return render_json({ 'result': False, 'message': u"查询agent状态接口失败", 'data': [] }) agent_fail_cnt = agent_ok_cnt = 0 ok_ip_list = list() fail_ip_list = list() for hostid, status in agent_status_info.iteritems(): ip, plat_id = parse_host_id(hostid) if status == AGENT_STATUS.ON: agent_ok_cnt += 1 ok_ip_list.append({'ip': ip, 'cc_plat_id': plat_id}) else: agent_fail_cnt += 1 fail_ip_list.append({'ip': ip, 'cc_plat_id': plat_id}) details = { 'agent_ok_cnt': agent_ok_cnt, 'agent_fail_cnt': agent_fail_cnt, 'ok_ip_list': ok_ip_list, 'fail_ip_list': fail_ip_list, } res = { 'result': True, 'message': "", 'data': agent_ok_cnt, 'details': details } return render_json(res)
def index(request, cc_biz_id): """ request.method: get: 基础性能页面首页 post: 主机性能信息,包括主机属性,主机各指标信息。 """ def get_access_status(cc_biz_id): """ 获取数据平台基础性能接入进展 :return: { "accessed": True, "div_message": u"正在接入中", "btn_message": u"确定" } """ btn_message = u"确定" div_message = u"您的性能指标采集任务已下发正在进行中,请稍候再试!" # step 1 获取该业务下基础性能接入的data_id data_id_exist = any(get_bp_data_id(cc_biz_id)) # step 2 如果data_id 不存在,则直接返回未接入 if not data_id_exist: div_message = (u"检测到您的业务尚未开启监控," u"点击下面按钮开启主机指标采集,\n" u"10-20分钟后刷新此页面即可查看到主机数据,请耐心等待!\n" u"(未安装agent的主机," u"在主机详情页中按照指引完成部署并开启数据采集。)") btn_message = u"开始采集" return { "accessed": data_id_exist, "div_message": div_message, "btn_message": btn_message } data = { "hosts": [], "update_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), } if request.method == "GET": count_per_page = 10 # 获取云区域信息 plat_info = CCBiz.plat_info(cc_biz_id) checkhost_id = request.GET.get("alarm_strategy_id") return render_mako_context(request, '/monitor/performanceV2/home.html', locals()) ts = TimeStats(u"get_host_info_list: %s" % cc_biz_id) # 获取所有主机信息(前台分页) ts.split(u"获取主机列表") hosts = CCBiz.hosts(cc_biz_id).get("data") or [] host_ids = set([host_key(h) for h in hosts]) ts.split(u"获取主机agent状态") hosts_agent_status = CCBiz.agent_status(cc_biz_id, host_ids) default_status_info = { 'alarm': { 'level': [0, 0, 0], }, 'component': [], } try: # 主机性能数据结果表字典,当性能获取需要查询之前查过的结果表的时候, # 直接使用已有的client,节省查询时间。 # cpu 总使用率 ts.split(u"获取主机cpu 总使用率") cpu_usage_info = HostIndex.get_host_performance( cc_biz_id, item="cpuusage", category="cpu", result_table_id="ja_gse_cpu_cpuusage") # CPU 5分钟负载 ts.split(u"获取主机cpu 5分钟负载") cpu_load_info = HostIndex.get_host_performance( cc_biz_id, item="locdavg", category="cpu", result_table_id="ja_gse_cpu") # cpu 单核使用率 ts.split(u"获取主机cpu 单核使用率") cpu_single_usage_info = HostIndex.get_host_performance( cc_biz_id, item="cpuusage", category="cpu", result_table_id="ja_gse_cpu_core_cpuusage") # 磁盘使用量 (暂时隐藏) # disk_used_info = get_host_performance(cc_biz_id, item="used_percent", # category="disk", result_table_id="ja_gse_disk_used") # IO等待 ts.split(u"获取主机IO等待") io_util_info = HostIndex.get_host_performance( cc_biz_id, item="util", category="disk", result_table_id="ja_gse_disk_iostats") host_list = list() ts.split(u"处理整合所有数据") for h in hosts: host = Host(h) for k, v in default_status_info.iteritems(): setattr(host, k, v) host.is_stickied = MonitorHostSticky.objects.host_is_stickied( host.id) # 基础性能数据 host.cpu_usage = cpu_usage_info.get(host.id) host.cpu_single_usage = cpu_single_usage_info.get(host.id) host.cpu_load = cpu_load_info.get(host.id) host.io_util = io_util_info.get(host.id) if host.io_util and isinstance(host.io_util.get("val"), list): io_util = dict() for _io_util in host.io_util["val"]: for k, v in _io_util.iteritems(): io_util[k] = v host.io_util["val"] = max(io_util.values()) host.status = hosts_agent_status.get(host.id, AGENT_STATUS.UNKNOWN) index_list = [ host.cpu_usage, host.cpu_single_usage, host.cpu_load, host.io_util ] if host.status == AGENT_STATUS.ON and not any(index_list): host.status = AGENT_STATUS.NO_DATA # 控制前端页面展示的参数 # 是否被选中 host.checked = False # 是否显示(按属性分组时) host._is_show = True host_list.append(host) host_list.sort(key=lambda x: x.status, reverse=True) data["hosts"] = host_list return render_json(ok_data(data)) except (TableNotExistException, SqlQueryException) as e: # 前端引导用户接入 if not hosts: data["need_access"] = False data["access_div_message"] = ( u"检测到当前业务没有主机,请前往 %s 快速部署主机!" % href_link(u"Agent安装", AGENT_SETUP_URL)) data["access_btn_message"] = u"确定" else: host_list = list() for h in hosts: host = Host(h) for k, v in default_status_info.iteritems(): setattr(host, k, v) host.is_stickied = MonitorHostSticky.objects.host_is_stickied( host.id) # 基础性能数据 host.cpu_usage = None host.cpu_single_usage = None host.cpu_load = None host.io_util = None host.status = hosts_agent_status.get(host.id, AGENT_STATUS.UNKNOWN) if host.status == AGENT_STATUS.ON: host.status = AGENT_STATUS.NO_DATA # 控制前端页面展示的参数 # 是否被选中 host.checked = False # 是否显示(按属性分组时) host._is_show = True host_list.append(host) host_list.sort(key=lambda x: x.status, reverse=True) data["hosts"] = host_list access_status = get_access_status(cc_biz_id) data["need_access"] = not access_status.get("accessed") if isinstance(e, SqlQueryException): logger.exception(u"数据平台查询失败:%s" % e) data["access_div_message"] = u"数据查询异常,请联系管理员" else: data["access_div_message"] = access_status.get("div_message") data["access_btn_message"] = access_status.get("btn_message") return render_json(failed_data(u"", data)) # except SqlQueryException as e: # logger.exception(u"数据平台查询失败:%s" % e) # return render_json(failed_data(u"数据查询异常", data)) except Exception as e: logger.exception(u"拉取主机性能信息失败: %s" % e) return render_json(failed_data(u"系统错误", data)) finally: ts.stop() time_stats_info = ts.display() logger.warning(time_stats_info)