Example #1
0
def get_agent_status(request, cc_biz_id):
    """获取agent安装状态

    :param request:
    :param cc_biz_id:
    :return:
    """
    hosts_list = CCBiz.hosts(cc_biz_id).get('data') or []
    hostid_list = [host_key(h) for h in hosts_list]
    agent_status_info = CCBiz.agent_status(cc_biz_id, hostid_list)
    if hosts_list and not len(agent_status_info):
        return render_json({
            'result': False,
            'message': u"查询agent状态接口失败",
            'data': []
        })
    agent_fail_cnt = agent_ok_cnt = 0
    ok_ip_list = list()
    fail_ip_list = list()
    for hostid, status in agent_status_info.iteritems():
        ip, plat_id = parse_host_id(hostid)
        if status == AGENT_STATUS.ON:
            agent_ok_cnt += 1
            ok_ip_list.append({'ip': ip, 'cc_plat_id': plat_id})
        else:
            agent_fail_cnt += 1
            fail_ip_list.append({'ip': ip, 'cc_plat_id': plat_id})
    details = {
        'agent_ok_cnt': agent_ok_cnt,
        'agent_fail_cnt': agent_fail_cnt,
        'ok_ip_list': ok_ip_list,
        'fail_ip_list': fail_ip_list,
    }
    res = {
        'result': True,
        'message': "",
        'data': agent_ok_cnt,
        'details': details
    }

    return render_json(res)
Example #2
0
def index(request, cc_biz_id):
    """
    request.method:
    get:    基础性能页面首页
    post:   主机性能信息,包括主机属性,主机各指标信息。
    """
    def get_access_status(cc_biz_id):
        """
        获取数据平台基础性能接入进展
        :return: {
            "accessed": True,
            "div_message": u"正在接入中",
            "btn_message": u"确定"
        }
        """
        btn_message = u"确定"
        div_message = u"您的性能指标采集任务已下发正在进行中,请稍候再试!"
        # step 1 获取该业务下基础性能接入的data_id
        data_id_exist = any(get_bp_data_id(cc_biz_id))
        # step 2 如果data_id 不存在,则直接返回未接入
        if not data_id_exist:
            div_message = (u"检测到您的业务尚未开启监控,"
                           u"点击下面按钮开启主机指标采集,\n"
                           u"10-20分钟后刷新此页面即可查看到主机数据,请耐心等待!\n"
                           u"(未安装agent的主机,"
                           u"在主机详情页中按照指引完成部署并开启数据采集。)")
            btn_message = u"开始采集"
        return {
            "accessed": data_id_exist,
            "div_message": div_message,
            "btn_message": btn_message
        }

    data = {
        "hosts": [],
        "update_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    }
    if request.method == "GET":
        count_per_page = 10
        # 获取云区域信息
        plat_info = CCBiz.plat_info(cc_biz_id)
        checkhost_id = request.GET.get("alarm_strategy_id")
        return render_mako_context(request, '/monitor/performanceV2/home.html',
                                   locals())
    ts = TimeStats(u"get_host_info_list: %s" % cc_biz_id)
    # 获取所有主机信息(前台分页)
    ts.split(u"获取主机列表")
    hosts = CCBiz.hosts(cc_biz_id).get("data") or []
    host_ids = set([host_key(h) for h in hosts])
    ts.split(u"获取主机agent状态")
    hosts_agent_status = CCBiz.agent_status(cc_biz_id, host_ids)
    default_status_info = {
        'alarm': {
            'level': [0, 0, 0],
        },
        'component': [],
    }
    try:
        # 主机性能数据结果表字典,当性能获取需要查询之前查过的结果表的时候,
        # 直接使用已有的client,节省查询时间。
        # cpu 总使用率
        ts.split(u"获取主机cpu 总使用率")
        cpu_usage_info = HostIndex.get_host_performance(
            cc_biz_id,
            item="cpuusage",
            category="cpu",
            result_table_id="ja_gse_cpu_cpuusage")
        # CPU 5分钟负载
        ts.split(u"获取主机cpu 5分钟负载")
        cpu_load_info = HostIndex.get_host_performance(
            cc_biz_id,
            item="locdavg",
            category="cpu",
            result_table_id="ja_gse_cpu")
        # cpu 单核使用率
        ts.split(u"获取主机cpu 单核使用率")
        cpu_single_usage_info = HostIndex.get_host_performance(
            cc_biz_id,
            item="cpuusage",
            category="cpu",
            result_table_id="ja_gse_cpu_core_cpuusage")
        # 磁盘使用量 (暂时隐藏)
        # disk_used_info = get_host_performance(cc_biz_id, item="used_percent",
        # category="disk", result_table_id="ja_gse_disk_used")
        # IO等待
        ts.split(u"获取主机IO等待")
        io_util_info = HostIndex.get_host_performance(
            cc_biz_id,
            item="util",
            category="disk",
            result_table_id="ja_gse_disk_iostats")
        host_list = list()
        ts.split(u"处理整合所有数据")
        for h in hosts:
            host = Host(h)
            for k, v in default_status_info.iteritems():
                setattr(host, k, v)
            host.is_stickied = MonitorHostSticky.objects.host_is_stickied(
                host.id)
            # 基础性能数据
            host.cpu_usage = cpu_usage_info.get(host.id)
            host.cpu_single_usage = cpu_single_usage_info.get(host.id)
            host.cpu_load = cpu_load_info.get(host.id)
            host.io_util = io_util_info.get(host.id)
            if host.io_util and isinstance(host.io_util.get("val"), list):
                io_util = dict()
                for _io_util in host.io_util["val"]:
                    for k, v in _io_util.iteritems():
                        io_util[k] = v
                host.io_util["val"] = max(io_util.values())
            host.status = hosts_agent_status.get(host.id, AGENT_STATUS.UNKNOWN)
            index_list = [
                host.cpu_usage, host.cpu_single_usage, host.cpu_load,
                host.io_util
            ]
            if host.status == AGENT_STATUS.ON and not any(index_list):
                host.status = AGENT_STATUS.NO_DATA
            # 控制前端页面展示的参数
            # 是否被选中
            host.checked = False
            # 是否显示(按属性分组时)
            host._is_show = True
            host_list.append(host)
        host_list.sort(key=lambda x: x.status, reverse=True)
        data["hosts"] = host_list
        return render_json(ok_data(data))
    except (TableNotExistException, SqlQueryException) as e:
        # 前端引导用户接入
        if not hosts:
            data["need_access"] = False
            data["access_div_message"] = (
                u"检测到当前业务没有主机,请前往 %s 快速部署主机!" %
                href_link(u"Agent安装", AGENT_SETUP_URL))
            data["access_btn_message"] = u"确定"
        else:
            host_list = list()
            for h in hosts:
                host = Host(h)
                for k, v in default_status_info.iteritems():
                    setattr(host, k, v)
                host.is_stickied = MonitorHostSticky.objects.host_is_stickied(
                    host.id)
                # 基础性能数据
                host.cpu_usage = None
                host.cpu_single_usage = None
                host.cpu_load = None
                host.io_util = None
                host.status = hosts_agent_status.get(host.id,
                                                     AGENT_STATUS.UNKNOWN)
                if host.status == AGENT_STATUS.ON:
                    host.status = AGENT_STATUS.NO_DATA
                # 控制前端页面展示的参数
                # 是否被选中
                host.checked = False
                # 是否显示(按属性分组时)
                host._is_show = True
                host_list.append(host)
            host_list.sort(key=lambda x: x.status, reverse=True)
            data["hosts"] = host_list
            access_status = get_access_status(cc_biz_id)
            data["need_access"] = not access_status.get("accessed")
            if isinstance(e, SqlQueryException):
                logger.exception(u"数据平台查询失败:%s" % e)
                data["access_div_message"] = u"数据查询异常,请联系管理员"
            else:
                data["access_div_message"] = access_status.get("div_message")
            data["access_btn_message"] = access_status.get("btn_message")
        return render_json(failed_data(u"", data))
    # except SqlQueryException as e:
    #     logger.exception(u"数据平台查询失败:%s" % e)
    #     return render_json(failed_data(u"数据查询异常", data))
    except Exception as e:
        logger.exception(u"拉取主机性能信息失败: %s" % e)
        return render_json(failed_data(u"系统错误", data))
    finally:
        ts.stop()
        time_stats_info = ts.display()
        logger.warning(time_stats_info)