def update_node_storage(ipaddr, node_uuid): """ 更新节点的存储信息 """ logger.debug("update node %s storages", ipaddr) url = "/api/v1/monitor/disk" rep_json = monitor_post(ipaddr, url, None) if rep_json["code"] != 0: logger.error("get node:%s storage info fail" % ipaddr) return storages = db_api.get_node_storage_all({"node_uuid": node_uuid}) data = rep_json.get("data", {}) logger.debug("disk usage info:%s", rep_json) for k, v in data.items(): if isinstance(v, dict) and k not in ['/', '/home', '/boot', '/boot/efi']: for storage in storages: if storage.path == k: if storage.used != int(v['used']) or storage.free != int( v['free']): storage.used = v['used'] storage.free = v['free'] storage.total = v['total'] storage.soft_update() logger.info("update node %s path %s usage, value:%s", ipaddr, k, v) break logger.debug("update node %s storages end", ipaddr) # 下面是更新网卡信息 rep_json = monitor_post(ipaddr, 'api/v1/monitor/network', {}) if rep_json["code"] != 0: logger.error("get node:%s network info fail" % ipaddr) return data = rep_json.get("data", {}) nics = db_api.get_nics_all({"node_uuid": node_uuid}) for k, v in data.items(): if isinstance(v, dict): for nic in nics: if nic.nic == k: if v['stat'] and 2 != nic.status: nic.status = 2 nic.soft_update() logger.info("update node %s nic %s status, value:%s", ipaddr, k, v['stat']) if not v['stat'] and 1 != nic.status: nic.status = 1 nic.soft_update() logger.info("update node %s nic %s status, value:%s", ipaddr, k, v['stat']) break logger.debug("update node %s nic status end", ipaddr) return
def update_node_performance(): nodes = db_api.get_node_with_all({'deleted': False}) for node in nodes: try: if node.status == constants.STATUS_ACTIVE: ret = monitor_post( node.ip, 'api/v1/monitor/resource_perf_for_database', {'statis_period': 30}) if ret.get('code') == 0: ret_data = ret.get("data", {}) node_utc = ret_data.get("utc", 0) node_datetime = dt.datetime.fromtimestamp(node_utc) insert_data = { "node_uuid": node.uuid, "node_datetime": node_datetime, "monitor_info": json.dumps(ret_data) } logger.debug( "insert monitor performance data success, node_ip: {}, data: {}" .format(node.ip, ret)) db_api.add_monitor_half_min(insert_data) else: logger.error( "monitor server error, node_ip:{}, ret: {}".format( node.ip, ret)) except Exception as e: logger.error("get service status error:%s", e, exc_info=True)
def get_spice_link(self, host_ports): # { # "172.16.1.30": ["5901","5902"], # "172.16.1.31": ["5901","5902"], # "172.16.1.32": ["5901","5902"] # } # return # { # "172.16.1.39": {"5901": true} # } _d = dict() for k, v in host_ports.items(): if not v: continue _d[k] = {} ports = ",".join(v) ret = monitor_post(k, "/api/v1/monitor/port_status", {"ports": ports}) if ret.get("code", -1) == 0: data = ret["data"] else: logger.error("monitor %s %s spice ports return: %s" % (k, ports, ret)) data = {} for i in v: _d[k].update({i: data.get(i, False)}) return _d
def get_links_num(self): count = 0 instances = db_api.get_instance_with_all({}) rep_data = dict() for instance in instances: host_ip = instance.host.ip if host_ip not in rep_data: rep_data[host_ip] = list() if instance.spice_port: rep_data[host_ip].append(instance.spice_port) for k, v in rep_data.items(): ports = ",".join(list(set(v))) if ports: ports_status = monitor_post(k, "/api/v1/monitor/port_status", {"ports": ports}) else: ports_status = {} if ports_status.get('code', -1) != 0: logger.error("from node %s get port status:%s", k, ports_status) continue logger.info("from node %s get port status:%s", k, ports_status) for port, link in ports_status.get("data", {}).items(): if link: count += 1 logger.info("the instance link count:%s", count) return count
def update_instance_info(): instances = db_api.get_instance_with_all({}) rep_data = dict() instance_dict = dict() for instance in instances: instance_dict[instance.uuid] = instance host_ip = instance.host.ip _d = { "uuid": instance.uuid, "name": instance.name # "spice_port": spice_port } if host_ip not in rep_data: rep_data[host_ip] = list() rep_data[host_ip].append(_d) # link_num = 0 for k, v in rep_data.items(): command_data = { "command": "get_status_many", "handler": "InstanceHandler", "data": { "instance": v } } logger.debug("get instance state in node %s", k) rep_json = compute_post(k, command_data) logger.debug("from compute get rep_json:{}".format(rep_json)) if rep_json.get("code", -1) != 0: # 如果节点计算服务连接失败,则桌面都更新为关机状态 if rep_json.get("code", -1) == 80000: for _d in v: if instance_dict[ _d["uuid"]].status != constants.STATUS_INACTIVE: instance_dict[_d["uuid"]].update( {"status": constants.STATUS_INACTIVE}) logger.info( "compute service unavaiable at node: %s, update instance.status to inactive: %s", k, _d["uuid"]) continue for item in rep_json.get("data", []): for instance in instances: if item["uuid"] == instance.uuid: if item.get("state") in [ constants.DOMAIN_STATE['running'] ]: if constants.STATUS_INACTIVE == instance.status: instance.status = constants.STATUS_ACTIVE instance.soft_update() elif item.get('state') in [ constants.DOMAIN_STATE['shutdown'], constants.DOMAIN_STATE['shutoff'] ]: if constants.STATUS_ACTIVE == instance.status: instance.status = constants.STATUS_INACTIVE # instance.spice_port = '' # instance.spice_link = 0 # instance.allocated = 0 # instance.link_time = None # 通知终端管理 桌面关闭 # 只对绑定了终端的桌面发通知 if instance.terminal_mac: if instance.classify == 2: desktop = db_api.get_personal_desktop_with_first( {'uuid': instance.desktop_uuid}) else: desktop = db_api.get_desktop_by_uuid( desktop_uuid=instance.desktop_uuid) if desktop: data = { 'desktop_name': desktop.name, 'desktop_order': desktop.order_num, 'desktop_uuid': desktop.uuid, 'instance_uuid': instance.uuid, 'instance_name': instance.name, 'host_ip': instance.host.ip, 'port': instance.spice_port, 'token': instance.spice_token, 'os_type': desktop.os_type, 'terminal_mac': instance.terminal_mac } logger.info( 'rtn: instance.classify: %s, data: %s' % (instance.classify, data)) base_controller = BaseController() ret = base_controller.notice_terminal_instance_close( data) # 通知完成后,清除桌面与终端的绑定关系 if ret: try: instance.terminal_mac = None except Exception as e: logger.error( "update instance.terminal_mac to None: %s failed: %s", instance.uuid, e) logger.info( 'rtn: %s, desktop.uuid: %s, instance.terminal_mac: %s' % (ret, desktop.uuid, instance.terminal_mac)) instance.soft_update() else: pass # instance.soft_update() logger.debug("the instance %s state %s", instance.uuid, item.get('state', 0)) break spice_ports = list() for instance in instances: if instance.spice_port: spice_ports.append(instance.spice_port) # 查询监控服务端口 ports = ",".join(list(set(spice_ports))) if ports: ports_status = monitor_post(k, "/api/v1/monitor/port_status", {"ports": ports}) else: ports_status = {} logger.info("from node %s get port status:%s", k, ports_status) for instance in instances: if instance.host.ip == k and instance.spice_port: instance.spice_link = ports_status.get("data", {}).get( instance.spice_port, False) if not instance.spice_link: instance.allocated = 0 instance.soft_update() logger.debug("the instance %s spice_link:%s", instance.uuid, instance.spice_link)
def update_node_status(): # 启用HA后,主备控节点的type是动态的,先检查HA信息,确保节点type是正确的 update_ha_master() nodes = db_api.get_node_with_all({'deleted': False}) for node in nodes: is_restart = False is_shutdowning = False if node.status == constants.STATUS_DELETING: continue if node.status == constants.STATUS_SHUTDOWNING: is_shutdowning = True # continue logger.debug("node %s updateing", node.name) status = constants.STATUS_ACTIVE if node.status == constants.STATUS_RESTARTING: restart_time = node.updated_at now = datetime.datetime.utcnow() if float(str(now - restart_time).split(":")[-1]) <= 120: if not icmp_ping(node.ip, count=2): continue is_restart = True if not icmp_ping(node.ip, count=3): if not is_restart: status = constants.STATUS_SHUTDOWN # rep_json = check_node_status(node.ip) # if rep_json.get('code') != 0: # status = constants.STATUS_SHUTDOWN else: try: ret = monitor_post(node.ip, 'api/v1/monitor/memory', {}) if ret.get('code') == 0: mem_info = ret['data'] node.running_mem = mem_info[ "available"] / 1024 / 1024 / 1024 node.total_mem = mem_info['total'] / 1024 / 1024 / 1024 node.mem_utilization = mem_info["utilization"] ret = monitor_post(node.ip, 'api/v1/monitor/cpu', {}) cpu_ratio = 0 if ret.get('code') == 0: cpu_info = ret['data'] cpu_ratio = cpu_info["utilization"] node.cpu_utilization = cpu_info["utilization"] node.soft_update() if cpu_ratio >= 95: status = constants.STATUS_ERROR ret = monitor_post(node.ip, 'api/v1/monitor/service', {}) if ret.get('code') == 0: services = ret['data'] not_running_services = list( filter( lambda service: services[service] != 'running', services.keys())) if node.type in [ constants.ROLE_MASTER_AND_COMPUTE, constants.ROLE_MASTER ]: node_services = constants.MASTER_SERVICE elif node.type in [ constants.ROLE_SLAVE_AND_COMPUTE, constants.ROLE_COMPUTE ]: node_services = constants.COMPUTE_SERVICE else: node_services = [] update_service_status(node, services, node_services) for service in not_running_services: if service in node_services: logger.error("service %s is not running", service) status = constants.STATUS_ERROR break else: status = constants.STATUS_ERROR else: status = constants.STATUS_ERROR except Exception as e: logger.error("get service status error:%s", e, exc_info=True) status = constants.STATUS_ERROR if node.status != status: if status == constants.STATUS_ERROR and is_restart and node.type not in [ 1, 3 ]: continue elif status == constants.STATUS_ERROR and is_shutdowning and node.type not in [ 1, 3 ]: continue logger.info("node %s status change from %s to %s", node.ip, node.status, status) node.status = status node.soft_update() # 只要节点没关机,就可以请求monitor服务去获取磁盘使用信息 if status != constants.STATUS_SHUTDOWN: update_node_storage(node.ip, node.uuid)
def get_voi_data(self, data): logger.info("get node voi server data") # node = db_api.get_node_with_first({'id': 1}) node = db.session.query(YzyNodes).filter(YzyNodes.type.in_( [1, 3])).first() req_nic_url = '/api/v1/monitor/networkio' req_data = {} old_nic_ret = monitor_post(node.ip, req_nic_url, req_data) node_cpu_info = {} node_memory_info = {} node_disk_info = {} node_nic_info = {} req_cpu_url = '/api/v1/monitor/cpu' req_memory_url = '/api/v1/monitor/memory' req_disk_url = '/api/v1/monitor/disk' req_nic_url = '/api/v1/monitor/networkio' try: cpu_ret = monitor_post(node.ip, req_cpu_url, req_data) memory_ret = monitor_post(node.ip, req_memory_url, req_data) node_cpu_info['numbers'] = cpu_ret['data']['numbers'] node_cpu_info['utilization'] = cpu_ret['data']['utilization'] node_memory_info['free'] = memory_ret['data'][ 'total'] - memory_ret['data']['available'] node_memory_info['total'] = memory_ret['data']['total'] node_memory_info['available'] = memory_ret['data']['available'] node_memory_info['utilization'] = memory_ret['data']['utilization'] disk_ret = monitor_post(node.ip, req_disk_url, req_data) storages = db_api.get_node_storage_all({'node_uuid': node.uuid}) disk_ssd = [0, 0, 0] for storage in storages: if storage.type == 1 and storage.path in disk_ret["data"].keys( ): # 1-ssd 2-sata logger.debug(storage.path) disk_ssd[0] += disk_ret["data"][storage.path]["total"] disk_ssd[1] += disk_ret["data"][storage.path]["used"] disk_ssd[2] += disk_ret['data'][storage.path]['free'] node_disk_info['ratio'] = float( '%0.2f' % (disk_ssd[1] / disk_ssd[0] * 100)) if disk_ssd[0] else 0 node_disk_info['total'] = disk_ssd[0] node_disk_info['used'] = disk_ssd[1] node_disk_info['free'] = disk_ssd[2] time.sleep(0.8) nic_ret = monitor_post(node.ip, req_nic_url, req_data) manage_network_name = db_api.get_node_manage_nic_name(node.uuid) logger.debug(manage_network_name) if manage_network_name and manage_network_name in nic_ret[ "data"].keys(): node_nic_info['bytes_send'] = nic_ret["data"][ manage_network_name]["bytes_send"] node_nic_info['bytes_recv'] = nic_ret["data"][ manage_network_name]["bytes_recv"] node_nic_info[ 'bytes_send'] = node_nic_info['bytes_send'] - old_nic_ret[ "data"][manage_network_name]["bytes_send"] node_nic_info[ 'bytes_recv'] = node_nic_info['bytes_recv'] - old_nic_ret[ "data"][manage_network_name]["bytes_recv"] except Exception as e: logger.error("get node voi server data fail %s", e, exc_info=True) return get_error_result("OtherError") resp = get_error_result("Success") resp['data'] = {} resp["data"]["cpu_util"] = node_cpu_info resp['data']['memory_util'] = node_memory_info resp['data']['disk_util'] = node_disk_info resp['data']['nic_util'] = node_nic_info return resp