def do_enable_sysdb_node(self, ip): (status, nodes_info) = self.get_sysdb_nodes() if status != 0: return (-1, "get enable sysdb node: get nodes failed") cmd = "/usr/bin/etcdctl --endpoints=%s member add %s --peer-urls=http://%s:2380" \ % (self._get_sysdb_endpoints(), self._get_sysdb_instance_name(ip), ip) (status, output) = sysdb.execute(cmd) if status != 0: self.log.syslog(syslog.LOG_ERR, "do add member failed:%s" % output) return (-1, output) ip_info = "" for item in nodes_info: ip_info += item + "," # new sysdb nodes ip ip_info += ip (status, output) = rpc.RpcClient.send_message( ip, "mod", "misc.sysdb.sysdb_mgt.sys_enable_sysdb_node_service", [ip_info]) if status != 0 or eval(output)[0] != 0: self.log.syslog( syslog.LOG_ERR, "do start new node sysdb service failed:%s,%s" % (ip, output)) return (-1, "start new node sysdb service failed:%s, %d, %s" % (ip, status, output)) return (0, "success")
def get_sysdb_nodes(self): nodes = [] index = 0 is_ready = False while index < MEMBER_READY_RETRY_TIMES: index += 1 cmd = "/usr/bin/etcdctl --endpoints=%s member list" \ % self._get_sysdb_endpoints() (status, output) = sysdb.execute(cmd) if status != 0: self.log.syslog( syslog.LOG_ERR, "get sysdb member info retry:%d, endpoint:%s,\ output:%s" % (index, self._get_sysdb_endpoints(), output)) continue else: is_ready = True break if not is_ready: self.log.syslog(syslog.LOG_ERR, "get sysdb member info failed:%s" % output) return (-1, nodes) #print output for item in output.split('\n'): detail = item.split(',') if detail[-1].strip().startswith("http://"): node_info = detail[-1].strip().split(':') node_info = node_info[1][2:] nodes.append(node_info) return (0, nodes)
def health(self): info = 0 # 0-unhealthy, 1-healthy end_points = "" (status, nodes) = self.get_sysdb_nodes() if status != 0: return (-1, info) for item in nodes: end_points += item + ":2379," if not end_points: return (-1, info) cmd = "/usr/bin/etcdctl --endpoints=%s endpoint health" \ % end_points.rstrip(',') (status, output) = sysdb.execute(cmd) if status != 0: self.log.syslog(syslog.LOG_ERR, "get sysdb health failed") return (0, info) #print nodes, output for item in output.split('\n'): if re.search(r"is healthy", item.strip()): # if one node is healthy, the sysdb cluster is healty info = 1 break return (0, info)
def is_sysdb_master_node(self): info = 0 # 0-unhealthy, 1-healthy end_points = "" (status, nodes) = self.get_sysdb_nodes() if status != 0: return False for item in nodes: end_points += item + ":2379," if not end_points: return False cmd = "/usr/bin/etcdctl --endpoints=%s endpoint status" \ % end_points.rstrip(',') (status, output) = sysdb.execute(cmd) if status != 0: self.log.syslog(syslog.LOG_ERR, "get sysdb health failed") return False #print nodes, output for item in output.split('\n'): if re.search(r"true", item.strip()): # if one node is healthy, the sysdb cluster is healty ip = item.split(':')[0] if net.is_local_ip(ip.strip()): return True else: return False return False
def _get_sysdb_node_member_id(self, ip): id = "" cmd = "/usr/bin/etcdctl --endpoints=%s member list" % self._get_sysdb_endpoints( ) (status, output) = sysdb.execute(cmd) for item in output.split('\n'): if re.search(r'%s' % ip, item): return item.split(',')[0].strip() return id
def do_disable_sysdb_node(self, ip): (status, nodes_info) = self.get_sysdb_nodes() if status != 0: return (-1, "get sysdb node info failed") if len(nodes_info) == 1: return (-1, "there should be have one sysdb node") is_sysdb = False for item in nodes_info: if item == ip: # is a sysdb node is_sysdb = True break if not is_sysdb: # not sysdb node, just return return (0, 'success') member_id = self._get_sysdb_node_member_id(ip) if not member_id: return (-1, "get sysdb member id failed:%s" % ip) cmd = "/usr/bin/etcdctl --endpoints=%s member remove %s" \ % (self._get_sysdb_endpoints(), member_id) (status, output) = sysdb.execute(cmd) if status != 0: return (-1, "disable sysdb failed:%d, %s" % (status, output)) ips = "" for item in nodes_info: if item != ip: ips += item + ',' ips = ips.rstrip(',') (status, output) = rpc.RpcClient.send_message( ip, "mod", "misc.sysdb.sysdb_mgt.sys_disable_sysdb_node_service", [ips]) if status != 0 or eval(output)[0] != 0: detail = "disable sysdb node service failed:%s" % output self.log.syslog(syslog.LOG_ERR, detail) return (-1, detail) self.log.syslog(syslog.LOG_INFO, "disable sysdb node %s success" % ip) return (0, "success")
def info(self): node_info = [] value = {"cluster": "unhealthy"} end_points = "" (status, nodes) = self.get_sysdb_nodes() if status != 0: return (-1, value) for item in nodes: end_points += item + ":2379," # sysdb hostname convert to storage node handle = ClsNode() (status, output) = handle.list_system_node() if status != 0: # ignore the system node info self.log.syslog(syslog.LOG_ERR, "get sytsem node info failed") return (-1, value) else: node_info = output cmd = "/usr/bin/etcdctl --endpoints=%s endpoint health" \ % end_points.rstrip(',') (status, output) = sysdb.execute(cmd) if status != 0: self.log.syslog(syslog.LOG_ERR, "get sysdb health failed") return (-1, value) #print nodes, output for node_item in nodes: for item in output.split('\n'): state = "" if item.strip().startswith(node_item): if re.search(r"is healthy", item.strip()): state = "healthy" value['cluster'] = "healthy" elif re.search(r"is unhealthy", item.strip()): state = "unhealthy" else: # invalid state, should not be here self.log.syslog(syslog.LOG_ERR, "get sysdb health unknown:%s, %s" \ % (node_item, item)) state = "unknown" # get hostname node_host = "" if node_info: for host_item in node_info['nodes']: if host_item['ip'] == node_item: node_host = host_item['hostname'] if not node_host: self.log.syslog(syslog.LOG_ERR, "get sysdb health node hostname failed:%s" \ % node_item) value[node_item] = state else: value[node_host] = state break return (0, sorted(value.items()))