def syn_vip(self, clu_name): """ 获取cluster的vip :param clu_name: :return: """ rlt = LauncherClient.instance().get_cluster_info(clu_name) if not rlt.success: Log(1, "syn_vip get_cluster_info error:{}".format(rlt.message)) return json_data, s = self.parse_json(rlt.content.get('info')) if not s: Log(1, "syn_vip cluster_info can not parse to json:{}".format(rlt.content.get('info'))) return vip = json_data.get('vip') rlt = Clusterdb.instance().get_vip(clu_name) if rlt.success: vip0, s = self.parse_json(rlt.content) if not s: Log(1, "syn_vip the info can not parse to json:{}".format(rlt.content)) return else: if rlt.result == ETCD_KEY_NOT_FOUND_ERR: vip0 = '' else: Log(1, "syn_vip can get vip error:{}".format(rlt.message)) return if vip != vip0: Clusterdb.instance().save_vip(clu_name, {'vip': vip}) return
def get_api(self, cluster_name): rlt = LauncherClient.instance().get_cluster_auth_info(cluster_name) if not rlt.success: return rlt # Log(4, "get_cluster_client auth_info:{}".format(rlt.content)) client = KubeClient(rlt.content) con = client.connect() if con.success: return Result(client.api) else: Log( 3, 'KubeClientMgr get_cluste_client [%s]fail, as[%s]' % (cluster_name, con.message)) return Result('', 500, con.message)
def load_cluster(self, cluster_name): """ 加载单个集群client :param cluster_name: :return: """ rlt = LauncherClient.instance().get_cluster_auth_info(cluster_name) if not rlt.success: if rlt.result == CLU_IS_PENDING: return Result('', CLU_IS_PENDING, 'clu master is pending') Log( 1, 'KubeClientMgr.load_cluster read_cluster_auth_info[%s]fail,as[%s]' % (cluster_name, rlt.message)) return rlt Log(4, "load_cluster, cluster_name:{}".format(cluster_name)) return self.add_cluster_client(cluster_name, rlt.content)
def timeout(self): try: Log(3, "syndata #timeout start at:{}".format(datetime.datetime.now())) if not CluNodedb.instance().ismaster(): Log(3, "syndata this node is not master") return # 当队列中有任务不添加 if self.task_queue.qsize(): Log(3, "syndata timeout task_queue.qsize:{},".format(self.task_queue.qsize())) return clu_apply_num = {} rlt1 = Clusterdb.instance().read_clu_map() if not rlt1.success: Log(1, "syndata timeout rlt1.message:{}".format(rlt1.message)) return for c, v in rlt1.content.items(): sp_key = c.split('/') if sp_key[-1] == 'apply_num': clu_apply_num[sp_key[-2]] = v clu_node = {} rlt = CluNodedb.instance().read_clunode_map(pass_nll_value=False) if not rlt.success: return Result('', rlt.result, rlt.message, 500) if rlt.success: for k, v in rlt.content.items(): sp_key = k.split('/') if len(sp_key) == 6: clu_node.setdefault(sp_key[4], {})[sp_key[5]] = v if len(sp_key) == 5: clu_node.setdefault(sp_key[4], {}) # apply_num个数的参数 clu_ws = WorkSpacedb.instance().read_gws() if not clu_ws.success: Log(3, "syndata timeout ws :{}".format(clu_ws.message)) return for clu_name, nodes in clu_node.items(): # 只同步有主机的集群 if not nodes: continue # 更新k8s集群的vip # self.syn_vip(clu_name) # 同步更新集群应用个数 syn_clu = self.syn_apply_num(clu_name, clu_ws.content.get(clu_name, []), clu_apply_num.get(clu_name, 0)) if not syn_clu.success: Log(1, "syndata clu_info apply_num error:{}".format(syn_clu.message)) # 当apiserver连接不上时,表明集群异常(可能是刚添加,也可能是添加成功后出现异常),需要修改主机的状态 rlt = KubeClientMgr.instance().get_cluster_client(clu_name) if not rlt.success: Log(3, "rlt.message:{}".format(rlt.message)) # 如果集群状态是pending则不执行检查任务 if rlt.result == CLU_IS_PENDING: return check_num = 3 while check_num: Log(3, "check_num:{}, clu_name:{}, node:{}".format(check_num, clu_name, len(nodes))) rlt = KubeClientMgr.instance().get_cluster_client(clu_name) if rlt.success: break check_num -= 1 time.sleep(0.5) if check_num == 0: self.change_node_status(nodes, {'status': 'error', 'message': rlt.message, 'pod_num': 0}) continue client = rlt.content gws = clu_ws.content.get(clu_name, []) ws_list = [] for i in gws: ws_list.extend(i.get('workspace', [])) for n in nodes.values(): if not n['name']: rlt = LauncherClient.instance().get_host_name(clu_name, n['type'] + 's', n['ip']) if not rlt.success: continue host_name = rlt.content ip_name = n['ip'].replace('.', '-') rlt = CluNodedb.instance().update_node(clu_name, ip_name, {'name': host_name}) if not rlt.success: Log(1, "update_node error:{}".format(rlt.message)) n['name'] = host_name task = SynDataFac(clu_name, n, client, ws_list) self.create_task(task) Log(3, "syndata create task all done at:{}".format(datetime.datetime.now())) return True except Exception as e: Log(1, "sysdata error:{}".format(e.message)) PrintStack() return None except KeyboardInterrupt: Log(3, "syndata timeout be killed") return None
def create_new_cluster(self, cluster_info, passport): """ 创建集群 :param creater: :param cluster_info: :return: """ master_ip = cluster_info.get('addr', '').split(':')[0] host_name = master_ip.replace('.', '-') # 检查license if not passport.get('licensed', ''): return Result('', LICENSE_OUT_OF_DATE, 'licensed is out of date', 400) # check集群是否存在 cluster_name = cluster_info.get('cluster_name', '') if Clusterdb.instance().clu_is_exist(cluster_name): return Result(0, CLUSTER_HAS_EXISTED, 'clu is existed', 400) masternode_list = [] nodemonitor_list = [] clunode_list = [] if cluster_info.get('create_way', '') == 'add': # check 集群ip是否添加过 if CluNodedb.instance().is_node_exist(cluster_name, host_name): return Result(0, msg='', result=CLUSTER_HAS_EXISTED, code=400) # 检查是否是ufleet主机 ufleet_hosts = GetSysConfig('ufleet_hosts').split(',') if master_ip in ufleet_hosts: return Result('', msg='the host is used by ufleet.', result=NODE_USED_BY_UFLEET, code=400) client = KubeClient({ 'auth_data': cluster_info.get('cacerts', ''), 'server': 'https://' + cluster_info.get('addr', ''), 'cert_data': cluster_info.get('apiservercerts'), 'client_key': cluster_info.get('apiserverkey'), 'cluser_name': cluster_name }) rlt = client.connect() if not rlt.success: Log( 3, 'KubeClientMgr.add_cluster[%s]fail, as[%s]' % (cluster_name, rlt.message)) return rlt self.__store[cluster_name] = client rlt = client.get_all_nodes() if not rlt.success: return rlt for j in rlt.content: address = j.get('status', {}).get('addresses', []) for add in address: if 'InternalIP' == add.get('type', ''): ip = add.get('address') if ip == cluster_info.get('addr', '').split(':')[0]: host_type = 'master' else: host_type = 'node' ip_name = ip.replace('.', '-') node_data = node_struct(cluster_name, add.get('address'), host_type, cluster_info.get('creater')) node_data = self.syn_nodeinfo(node_data, j, []) # clusternode clunode_list.append({ 'cluster_name': cluster_name, 'data': node_data }) # masternodedir masternode_data = masternode_struct( cluster_info.get('creater'), cluster_name, host_type, add.get('address', ''), '', '', '', '', '', '') masternode_list.append({ 'master_ip': ip_name, 'data': masternode_data }) # nodemonitor nodemonitor_list.append(ip_name) # 调用launcher保存集群认证信息接口 auth_data = auth_info_struct(cluster_info) rlt = LauncherClient.instance().load_cluster(auth_data) if not rlt.success: return Result('', 500, 'load_cluster error:' + rlt.message, 500) # 保存数据到etcd new_clu = clu_struct(cluster_info) rlt = Clusterdb.instance().create_cluster_full(cluster_name, new_clu) if not rlt.success: return Result('', rlt.result, rlt.message, 400) for i in clunode_list: rlt = CluNodedb.instance().save_node(i['cluster_name'], i['data']) if not rlt.success: return rlt for i in masternode_list: rlt = Masterdb.instance().save_master(i['master_ip'], i['data']) if not rlt.success: return rlt return Result('')