Ejemplo n.º 1
0
def update_node_storage(ipaddr, node_uuid):
    """
    更新节点的存储信息
    """
    logger.debug("update node %s storages", ipaddr)
    url = "/api/v1/monitor/disk"
    rep_json = monitor_post(ipaddr, url, None)
    if rep_json["code"] != 0:
        logger.error("get node:%s storage info fail" % ipaddr)
        return

    storages = db_api.get_node_storage_all({"node_uuid": node_uuid})
    data = rep_json.get("data", {})
    logger.debug("disk usage info:%s", rep_json)
    for k, v in data.items():
        if isinstance(v,
                      dict) and k not in ['/', '/home', '/boot', '/boot/efi']:
            for storage in storages:
                if storage.path == k:
                    if storage.used != int(v['used']) or storage.free != int(
                            v['free']):
                        storage.used = v['used']
                        storage.free = v['free']
                        storage.total = v['total']
                        storage.soft_update()
                        logger.info("update node %s path %s usage, value:%s",
                                    ipaddr, k, v)
                    break
    logger.debug("update node %s storages end", ipaddr)
    # 下面是更新网卡信息
    rep_json = monitor_post(ipaddr, 'api/v1/monitor/network', {})
    if rep_json["code"] != 0:
        logger.error("get node:%s network info fail" % ipaddr)
        return
    data = rep_json.get("data", {})
    nics = db_api.get_nics_all({"node_uuid": node_uuid})
    for k, v in data.items():
        if isinstance(v, dict):
            for nic in nics:
                if nic.nic == k:
                    if v['stat'] and 2 != nic.status:
                        nic.status = 2
                        nic.soft_update()
                        logger.info("update node %s nic %s status, value:%s",
                                    ipaddr, k, v['stat'])
                    if not v['stat'] and 1 != nic.status:
                        nic.status = 1
                        nic.soft_update()
                        logger.info("update node %s nic %s status, value:%s",
                                    ipaddr, k, v['stat'])
                    break
    logger.debug("update node %s nic status end", ipaddr)
    return
Ejemplo n.º 2
0
def update_node_performance():
    nodes = db_api.get_node_with_all({'deleted': False})
    for node in nodes:
        try:
            if node.status == constants.STATUS_ACTIVE:
                ret = monitor_post(
                    node.ip, 'api/v1/monitor/resource_perf_for_database',
                    {'statis_period': 30})
                if ret.get('code') == 0:
                    ret_data = ret.get("data", {})
                    node_utc = ret_data.get("utc", 0)
                    node_datetime = dt.datetime.fromtimestamp(node_utc)
                    insert_data = {
                        "node_uuid": node.uuid,
                        "node_datetime": node_datetime,
                        "monitor_info": json.dumps(ret_data)
                    }
                    logger.debug(
                        "insert monitor performance data success, node_ip: {}, data: {}"
                        .format(node.ip, ret))
                    db_api.add_monitor_half_min(insert_data)
                else:
                    logger.error(
                        "monitor server error, node_ip:{}, ret: {}".format(
                            node.ip, ret))
        except Exception as e:
            logger.error("get service status error:%s", e, exc_info=True)
Ejemplo n.º 3
0
    def get_spice_link(self, host_ports):
        # {
        #   "172.16.1.30": ["5901","5902"],
        #   "172.16.1.31": ["5901","5902"],
        #   "172.16.1.32": ["5901","5902"]
        # }
        # return
        # {
        #  "172.16.1.39": {"5901": true}
        # }
        _d = dict()
        for k, v in host_ports.items():
            if not v:
                continue
            _d[k] = {}
            ports = ",".join(v)
            ret = monitor_post(k, "/api/v1/monitor/port_status",
                               {"ports": ports})
            if ret.get("code", -1) == 0:
                data = ret["data"]
            else:
                logger.error("monitor %s %s spice ports return: %s" %
                             (k, ports, ret))
                data = {}
            for i in v:
                _d[k].update({i: data.get(i, False)})

        return _d
Ejemplo n.º 4
0
    def get_links_num(self):
        count = 0
        instances = db_api.get_instance_with_all({})
        rep_data = dict()
        for instance in instances:
            host_ip = instance.host.ip
            if host_ip not in rep_data:
                rep_data[host_ip] = list()
            if instance.spice_port:
                rep_data[host_ip].append(instance.spice_port)

        for k, v in rep_data.items():
            ports = ",".join(list(set(v)))
            if ports:
                ports_status = monitor_post(k, "/api/v1/monitor/port_status",
                                            {"ports": ports})
            else:
                ports_status = {}
            if ports_status.get('code', -1) != 0:
                logger.error("from node %s get port status:%s", k,
                             ports_status)
                continue
            logger.info("from node %s get port status:%s", k, ports_status)
            for port, link in ports_status.get("data", {}).items():
                if link:
                    count += 1
        logger.info("the instance link count:%s", count)
        return count
Ejemplo n.º 5
0
def update_instance_info():
    instances = db_api.get_instance_with_all({})
    rep_data = dict()
    instance_dict = dict()
    for instance in instances:
        instance_dict[instance.uuid] = instance
        host_ip = instance.host.ip
        _d = {
            "uuid": instance.uuid,
            "name": instance.name
            # "spice_port": spice_port
        }
        if host_ip not in rep_data:
            rep_data[host_ip] = list()
        rep_data[host_ip].append(_d)

    # link_num = 0
    for k, v in rep_data.items():
        command_data = {
            "command": "get_status_many",
            "handler": "InstanceHandler",
            "data": {
                "instance": v
            }
        }
        logger.debug("get instance state in node %s", k)
        rep_json = compute_post(k, command_data)
        logger.debug("from compute get rep_json:{}".format(rep_json))
        if rep_json.get("code", -1) != 0:
            # 如果节点计算服务连接失败,则桌面都更新为关机状态
            if rep_json.get("code", -1) == 80000:
                for _d in v:
                    if instance_dict[
                            _d["uuid"]].status != constants.STATUS_INACTIVE:
                        instance_dict[_d["uuid"]].update(
                            {"status": constants.STATUS_INACTIVE})
                        logger.info(
                            "compute service unavaiable at node: %s, update instance.status to inactive: %s",
                            k, _d["uuid"])
            continue
        for item in rep_json.get("data", []):
            for instance in instances:
                if item["uuid"] == instance.uuid:
                    if item.get("state") in [
                            constants.DOMAIN_STATE['running']
                    ]:
                        if constants.STATUS_INACTIVE == instance.status:
                            instance.status = constants.STATUS_ACTIVE
                            instance.soft_update()
                    elif item.get('state') in [
                            constants.DOMAIN_STATE['shutdown'],
                            constants.DOMAIN_STATE['shutoff']
                    ]:
                        if constants.STATUS_ACTIVE == instance.status:
                            instance.status = constants.STATUS_INACTIVE
                        # instance.spice_port = ''
                        # instance.spice_link = 0
                        # instance.allocated = 0
                        # instance.link_time = None

                        # 通知终端管理 桌面关闭
                        # 只对绑定了终端的桌面发通知
                        if instance.terminal_mac:
                            if instance.classify == 2:
                                desktop = db_api.get_personal_desktop_with_first(
                                    {'uuid': instance.desktop_uuid})
                            else:
                                desktop = db_api.get_desktop_by_uuid(
                                    desktop_uuid=instance.desktop_uuid)

                            if desktop:
                                data = {
                                    'desktop_name': desktop.name,
                                    'desktop_order': desktop.order_num,
                                    'desktop_uuid': desktop.uuid,
                                    'instance_uuid': instance.uuid,
                                    'instance_name': instance.name,
                                    'host_ip': instance.host.ip,
                                    'port': instance.spice_port,
                                    'token': instance.spice_token,
                                    'os_type': desktop.os_type,
                                    'terminal_mac': instance.terminal_mac
                                }
                                logger.info(
                                    'rtn: instance.classify: %s, data: %s' %
                                    (instance.classify, data))
                                base_controller = BaseController()
                                ret = base_controller.notice_terminal_instance_close(
                                    data)

                                # 通知完成后,清除桌面与终端的绑定关系
                                if ret:
                                    try:
                                        instance.terminal_mac = None
                                    except Exception as e:
                                        logger.error(
                                            "update instance.terminal_mac to None: %s failed: %s",
                                            instance.uuid, e)

                                logger.info(
                                    'rtn: %s, desktop.uuid: %s, instance.terminal_mac: %s'
                                    %
                                    (ret, desktop.uuid, instance.terminal_mac))
                        instance.soft_update()
                    else:
                        pass
                    # instance.soft_update()
                    logger.debug("the instance %s state %s", instance.uuid,
                                 item.get('state', 0))
                    break
        spice_ports = list()
        for instance in instances:
            if instance.spice_port:
                spice_ports.append(instance.spice_port)

        # 查询监控服务端口
        ports = ",".join(list(set(spice_ports)))
        if ports:
            ports_status = monitor_post(k, "/api/v1/monitor/port_status",
                                        {"ports": ports})
        else:
            ports_status = {}
        logger.info("from node %s get port status:%s", k, ports_status)
        for instance in instances:
            if instance.host.ip == k and instance.spice_port:
                instance.spice_link = ports_status.get("data", {}).get(
                    instance.spice_port, False)
                if not instance.spice_link:
                    instance.allocated = 0
                instance.soft_update()
            logger.debug("the instance %s spice_link:%s", instance.uuid,
                         instance.spice_link)
Ejemplo n.º 6
0
def update_node_status():
    # 启用HA后,主备控节点的type是动态的,先检查HA信息,确保节点type是正确的
    update_ha_master()

    nodes = db_api.get_node_with_all({'deleted': False})
    for node in nodes:
        is_restart = False
        is_shutdowning = False
        if node.status == constants.STATUS_DELETING:
            continue
        if node.status == constants.STATUS_SHUTDOWNING:
            is_shutdowning = True
        #     continue
        logger.debug("node %s updateing", node.name)
        status = constants.STATUS_ACTIVE
        if node.status == constants.STATUS_RESTARTING:
            restart_time = node.updated_at
            now = datetime.datetime.utcnow()
            if float(str(now - restart_time).split(":")[-1]) <= 120:
                if not icmp_ping(node.ip, count=2):
                    continue
                is_restart = True
        if not icmp_ping(node.ip, count=3):
            if not is_restart:
                status = constants.STATUS_SHUTDOWN
        # rep_json = check_node_status(node.ip)
        # if rep_json.get('code') != 0:
        #     status = constants.STATUS_SHUTDOWN
        else:
            try:
                ret = monitor_post(node.ip, 'api/v1/monitor/memory', {})
                if ret.get('code') == 0:
                    mem_info = ret['data']
                    node.running_mem = mem_info[
                        "available"] / 1024 / 1024 / 1024
                    node.total_mem = mem_info['total'] / 1024 / 1024 / 1024
                    node.mem_utilization = mem_info["utilization"]
                    ret = monitor_post(node.ip, 'api/v1/monitor/cpu', {})
                    cpu_ratio = 0
                    if ret.get('code') == 0:
                        cpu_info = ret['data']
                        cpu_ratio = cpu_info["utilization"]
                        node.cpu_utilization = cpu_info["utilization"]
                    node.soft_update()
                    if cpu_ratio >= 95:
                        status = constants.STATUS_ERROR
                    ret = monitor_post(node.ip, 'api/v1/monitor/service', {})
                    if ret.get('code') == 0:
                        services = ret['data']
                        not_running_services = list(
                            filter(
                                lambda service: services[service] != 'running',
                                services.keys()))
                        if node.type in [
                                constants.ROLE_MASTER_AND_COMPUTE,
                                constants.ROLE_MASTER
                        ]:
                            node_services = constants.MASTER_SERVICE
                        elif node.type in [
                                constants.ROLE_SLAVE_AND_COMPUTE,
                                constants.ROLE_COMPUTE
                        ]:
                            node_services = constants.COMPUTE_SERVICE
                        else:
                            node_services = []
                        update_service_status(node, services, node_services)
                        for service in not_running_services:
                            if service in node_services:
                                logger.error("service %s is not running",
                                             service)
                                status = constants.STATUS_ERROR
                                break
                    else:
                        status = constants.STATUS_ERROR
                else:
                    status = constants.STATUS_ERROR
            except Exception as e:
                logger.error("get service status error:%s", e, exc_info=True)
                status = constants.STATUS_ERROR
        if node.status != status:
            if status == constants.STATUS_ERROR and is_restart and node.type not in [
                    1, 3
            ]:
                continue
            elif status == constants.STATUS_ERROR and is_shutdowning and node.type not in [
                    1, 3
            ]:
                continue
            logger.info("node %s status change from %s to %s", node.ip,
                        node.status, status)
            node.status = status
            node.soft_update()
        # 只要节点没关机,就可以请求monitor服务去获取磁盘使用信息
        if status != constants.STATUS_SHUTDOWN:
            update_node_storage(node.ip, node.uuid)
Ejemplo n.º 7
0
 def get_voi_data(self, data):
     logger.info("get node voi server data")
     # node = db_api.get_node_with_first({'id': 1})
     node = db.session.query(YzyNodes).filter(YzyNodes.type.in_(
         [1, 3])).first()
     req_nic_url = '/api/v1/monitor/networkio'
     req_data = {}
     old_nic_ret = monitor_post(node.ip, req_nic_url, req_data)
     node_cpu_info = {}
     node_memory_info = {}
     node_disk_info = {}
     node_nic_info = {}
     req_cpu_url = '/api/v1/monitor/cpu'
     req_memory_url = '/api/v1/monitor/memory'
     req_disk_url = '/api/v1/monitor/disk'
     req_nic_url = '/api/v1/monitor/networkio'
     try:
         cpu_ret = monitor_post(node.ip, req_cpu_url, req_data)
         memory_ret = monitor_post(node.ip, req_memory_url, req_data)
         node_cpu_info['numbers'] = cpu_ret['data']['numbers']
         node_cpu_info['utilization'] = cpu_ret['data']['utilization']
         node_memory_info['free'] = memory_ret['data'][
             'total'] - memory_ret['data']['available']
         node_memory_info['total'] = memory_ret['data']['total']
         node_memory_info['available'] = memory_ret['data']['available']
         node_memory_info['utilization'] = memory_ret['data']['utilization']
         disk_ret = monitor_post(node.ip, req_disk_url, req_data)
         storages = db_api.get_node_storage_all({'node_uuid': node.uuid})
         disk_ssd = [0, 0, 0]
         for storage in storages:
             if storage.type == 1 and storage.path in disk_ret["data"].keys(
             ):  # 1-ssd  2-sata
                 logger.debug(storage.path)
                 disk_ssd[0] += disk_ret["data"][storage.path]["total"]
                 disk_ssd[1] += disk_ret["data"][storage.path]["used"]
                 disk_ssd[2] += disk_ret['data'][storage.path]['free']
         node_disk_info['ratio'] = float(
             '%0.2f' %
             (disk_ssd[1] / disk_ssd[0] * 100)) if disk_ssd[0] else 0
         node_disk_info['total'] = disk_ssd[0]
         node_disk_info['used'] = disk_ssd[1]
         node_disk_info['free'] = disk_ssd[2]
         time.sleep(0.8)
         nic_ret = monitor_post(node.ip, req_nic_url, req_data)
         manage_network_name = db_api.get_node_manage_nic_name(node.uuid)
         logger.debug(manage_network_name)
         if manage_network_name and manage_network_name in nic_ret[
                 "data"].keys():
             node_nic_info['bytes_send'] = nic_ret["data"][
                 manage_network_name]["bytes_send"]
             node_nic_info['bytes_recv'] = nic_ret["data"][
                 manage_network_name]["bytes_recv"]
             node_nic_info[
                 'bytes_send'] = node_nic_info['bytes_send'] - old_nic_ret[
                     "data"][manage_network_name]["bytes_send"]
             node_nic_info[
                 'bytes_recv'] = node_nic_info['bytes_recv'] - old_nic_ret[
                     "data"][manage_network_name]["bytes_recv"]
     except Exception as e:
         logger.error("get node voi server data fail %s", e, exc_info=True)
         return get_error_result("OtherError")
     resp = get_error_result("Success")
     resp['data'] = {}
     resp["data"]["cpu_util"] = node_cpu_info
     resp['data']['memory_util'] = node_memory_info
     resp['data']['disk_util'] = node_disk_info
     resp['data']['nic_util'] = node_nic_info
     return resp