def _delete_pv(self, cluster_name, volume_name, mount_node_list, operator): kube_client = KubeClientMgr.instance().get_cluster_client(cluster_name) if kube_client is None: Log(1, 'DeleteStorageNodeWork.delete_pv get_cluster_client[%s]fail'%(cluster_name)) return Result('', INVALID_PARAM_ERR, 'cluster_name is invalid', http.BAD_REQUEST) rlt = PVDB.instance().read_pv_info_by_volume_id(cluster_name, volume_name) if not rlt.success: Log(1, 'DeleteStorageNodeWork.delete_pv read_pv_info_by_volume_id[%s][%s]fail,as[%s]'%(cluster_name, volume_name, rlt.message)) return self.delete_volume(cluster_name, volume_name, mount_node_list) pv_info = rlt.content rlt = self.delete_volume(cluster_name, volume_name, mount_node_list) if not rlt.success: Log(1, 'DeleteStorageNodeWork.delete_pv _delete_volume[%s]fail,as[%s]'%(volume_name, rlt.message)) return rlt pv_name = pv_info.get('pv_name') rlt = kube_client.delete_persistent_volume_claim(pv_info['workspace'], pv_name) if not rlt.success: Log(1, 'DeleteStorageNodeWork.delete_pv delete_persistent_volume_claim[%s]fail,as[%s]'%(pv_name, rlt.message)) rlt = kube_client.delete_persistent_volume(pv_name) if not rlt.success: Log(1, 'DeleteStorageNodeWork.delete_pv delete_persisten_tvolume[%s]fail,as[%s]'%(pv_name, rlt.message)) rlt = PVDB.instance().delete_volume(cluster_name, pv_name) if rlt.success: LogDel(3, operator, u'从集群[%s]删除容器卷[%s]'%(cluster_name, pv_name)) else: Log(1, 'DeleteStorageNodeWork.delete_pv delete_volume[%s][%s] in etcd[%s]fail,as[%s]'%(cluster_name, pv_name, rlt.message)) return rlt
def check_valid(self): """ # 检查数据 """ try: if not StorageNodeDB.instance().is_app_node_exist(self.cluster_name, self.ip): return Result('', INVALID_PARAM_ERR, 'mount host is invalid') if self.veclient is None: self.veclient = VespaceMgr.instance().get_cluster_client(self.cluster_name) if not self.veclient.test(): return Result('', INIT_VESPACE_CLIENT_FAILERR, 'init vespace client fail.') kube_client = KubeClientMgr.instance().get_cluster_client(self.cluster_name) if kube_client is None: Log(1, 'AddPVWork.check_valid get_cluster_client[%s]fail'%(self.cluster_name)) return Result('', INVALID_PARAM_ERR, 'cluster_name is invalid') else: self.kubeclient = kube_client if self.recovery_model not in ['Retain', 'Recycle', 'Delete']: self.recovery_model = 'Delete' except InternalException,e: Log(1,"AddPVWork.check_valid except[%s]"%(e.value)) return Result("AddPVWork",e.errid,e.value)
def create(self, data): """ :param data: :return: """ content = data.get('content', '') # c_data = {"apiVersion": "rbac.authorization.k8s.io/v1", "kind": "ClusterRole", "metadata": {"name": data.get('name')}, # "rules": content} name = content.get('metadata', {}).get('name') clu_name = data.get('cluster_name', '') if not all([name, clu_name]): return Result('', 400, 'param error', 400) rlt = KubeClientMgr.instance().create_clusterrole( clu_name, name, content) if not rlt.success: Log(1, "clusterroles create error:{}".format(rlt.message)) return rlt # d_s = clusterrole(data) # 保存到etcd # rlt = ClusterRoledb.instance().save(name, d_s) # if not rlt.success: # return rlt WebLog(3, u'创建', "clusterrole[{}]".format(name, data.get('creater'))) return Result('')
def delete_storage_class(self, cluster_name, info, operator): storage_class = info.get('storage_class_name') if info.get('status') == STORAGE_CLASS_STATUS_NOT_READY: Log(1, 'DeleteStorageNodeWork.delete_storage_class [%s][%s]fail,as[The storage class not ready]'%(cluster_name, storage_class)) return self.delete_storage_class_from_db(storage_class) kube_client = KubeClientMgr.instance().get_cluster_client(cluster_name) if kube_client is None: Log(1, 'DeleteStorageNodeWork.delete_storage_class get_cluster_client[%s]fail'%(cluster_name)) return Result('', INVALID_PARAM_ERR, 'cluster_name is invalid', http.BAD_REQUEST) rlt = kube_client.remove_storage_class_deploy(STORAGE_CLASS_DEFAULT_NAMESPACE, storage_class) if not rlt.success: Log(1, 'DeleteStorageNodeWork.delete_storage_class remove_storage_class_deploy[%s]fail,as[%s]'%(storage_class, rlt.message)) return rlt mount_node_list = [] rlt = MountDB.instance().read_mount_list(cluster_name, info.get('volume_name')) if not rlt.success: Log(1, 'DeleteStorageNodeWork.delete_storage_class read_mount_list[%s]fail,as[%s]'%(cluster_name, info.get('volume_name'), rlt.message)) else: mount_node_list = rlt.content rlt = self.delete_volume(cluster_name, info.get('volume_id'), mount_node_list) if not rlt.success: Log(1, 'DeleteStorageNodeWork.delete_storage_class delete_volume[%s]fail,as[%s]'%(storage_class, rlt.message)) rlt = kube_client.delete_storage_class(storage_class) if not rlt.success: Log(1, 'DeleteStorageNodeWork.delete_storage_class delete_storage_class[%s]fail,as[%s]'%(storage_class, rlt.message)) return self.delete_storage_class_from_db(storage_class)
def workspace_update(self, passport, w): """ 更新workspace :param creater: :param w: :return: """ clu_name = w.get('cluster_name') ws_name = w.get('workspace_name') # 检查权限 if passport.get('ring') != 'ring0': rlt = self.__check_permission(passport.get('username'), clu_name) if not rlt.success: return rlt if not rlt.content: return Result('', CLU_NOT_AUTH, 'not allowed', 400) old_workspace = WorkSpacedb.instance().read_workspace( w.get('workspace_name')) # old_workspace = self.etcd.read(self.root + w.get('workspace_name'), json=True) if old_workspace.success: if w.get('resource_cpu') < old_workspace.content.get( 'cpu', 0) or w.get('resource_mem') < old_workspace.content.get( 'mem', 0): return Result('', msg='change value can not litter than old value', result=WORKSPACE_CHANGE_ERROR, code=400) else: return Result('', old_workspace.result, old_workspace.message, 400) # 校验资源值 check_res = self.check_resource(clu_name, w, old_workspace.content.get('cpu', 0), old_workspace.content.get('mem', 0)) if not check_res.success: return check_res # 通过apiserver更新namespace信息 u_status = KubeClientMgr.instance().update_cluster_namespace( clu_name, ws_name, w) if not u_status.success: return Result('', u_status.result, u_status.message, 400) # 更新etcd数据 data = workspace_struce(w, passport.get('username')) WorkSpacedb.instance().update_workspace(w.get('workspace_name'), data) WebLog(3, u'更新', u"cluster[{}]的workspace[{}]".format(clu_name, ws_name), passport.get('username')) return Result('')
def workspace_create(self, passport, w): """ 创建workspace :param creater: :param w: :return: """ clu_name = w.get('cluster_name') ws_name = w.get('workspace_name') # 检查权限 if passport.get('ring') != 'ring0': rlt = self.__check_permission(passport.get('username'), clu_name) if not rlt.success: return rlt if not rlt.content: return Result('', CLU_NOT_AUTH, 'not allowed', 400) # 检查参数 check_param = self.check_param(w) if not check_param.success: return Result('', check_param.result, check_param.message, check_param.code) # 校验资源值 check_res = self.check_resource(clu_name, w) if not check_res.success: return Result('', check_res.result, check_res.message, 400) # 通过apiserver创建namespace k = KubeClientMgr.instance().create_cluster_namespace( clu_name, ws_name, w) if not k.success: return Result('', k.result, k.message, 400) # 保存workspace数据 data = workspace_struce(w, passport.get('username')) WebLog(3, u'创建', u"cluster[{}]的workspace[{}]".format(clu_name, ws_name), passport.get('username')) return WorkSpacedb.instance().save_workspace(w.get('workspace_name'), data)
def pod_pause_id(self, **kwargs): """ 获取pod的pause容器id, 用于容器监控时获取容器的网络数据 :param host_ip: :param container_id: :return: """ try: host_ip = kwargs.get('host_ip') container_id = kwargs.get('container_id') if container_id: container_id = container_id.split( '//')[1] if '//' in container_id else container_id return KubeClientMgr.instance().get_pause_id( host_ip, container_id) else: return Result('', 400, 'param error', 400) except Exception as e: PrintStack() Log(1, "pod_pause id error:{}".format(e.message)) return Result('', 500, '', 500)
def create(self, data): """ :param data: :return: """ content = data.get('content', '') name = content.get('metadata', {}).get('name') clu_name = data.get('cluster_name', '') if not all([name, clu_name]): return Result('', 400, 'param error', 400) rlt = KubeClientMgr.instance().create_clusterrolebinding( clu_name, name, content) if not rlt.success: Log(1, "clusterrolebinding create error:{}".format(rlt.message)) return rlt # d_s = clusterrole(data) # 保存到etcd # rlt = ClusterRoledb.instance().save(name, d_s) # if not rlt.success: # return rlt WebLog(3, u'创建', "clusterrolebinding[{}]".format(name, data.get('creater'))) return Result('')
def delete(self, workspace, conf_name, username): """ 删除config :param name: :param version: :return: """ rlt = ConfigMapdb.instance().read_configmap(workspace, conf_name) if not rlt.success: if rlt.result == ETCD_KEY_NOT_FOUND_ERR: return Result( '', 400, 'the configmap[{}] not existed'.format(conf_name), 400) return rlt # 删除etcd中数据 rlt = ConfigMapdb.instance().del_configmap(workspace, conf_name) if not rlt.success: if rlt.result != ETCD_KEY_NOT_FOUND_ERR: WebLog( 3, u'删除', u"workspace[{}]下的configmap[{}]".format( workspace, conf_name), username) self.reload(1) return rlt # 通过apiserver删除configmap rlt = KubeClientMgr.instance().delete_configmap(workspace, conf_name) if not rlt.success: Log(1, "configmap delete get kubeclient error:{}".format(rlt.message)) # if rlt.code == 404 or rlt.result == FAIL or rlt.result == ETCD_KEY_NOT_FOUND_ERR: # pass # else: return rlt return Result('')
def delete(self, cluster_name, name): return KubeClientMgr.instance().delete_clusterrolebinding( cluster_name, name)
def list(self, cluster_name): return KubeClientMgr.instance().list_clusterrolebinding(cluster_name)
def creat_configmap(self, data): """ 创建configmap :param data: :return: """ # 检查版本在workspace下否存在 if ConfigMapdb.instance().is_existed( data.get('workspace'), data.get('name') + data.get('version')): return Result('', CONFIGMAP_EXISTED, 'is existed', 400) # 检查workspace是否存在 rlt = WorkSpacedb.instance().read_all_gws() if not rlt.success: return rlt group_info = rlt.content.get(data.get('group'), []) if data.get('workspace') not in group_info: return Result('', 400, 'the workspace not in the group', 400) try: content = json.loads(data.get('content')) Log(4, "content1:{}".format(content)) except ValueError: content = yaml.load(data.get('content')) Log(4, "content2:{}".format(content)) except Exception as e: return Result('', 400, str(e.message), 400) c_data = { "apiVersion": "v1", "kind": "ConfigMap", "metadata": { "name": data.get('name') + data.get('version') }, "data": content } Log(4, 'content:{}'.format(data.get('content', ''))) rlt = KubeClientMgr.instance().create_configmap( data.get('workspace'), c_data) if not rlt.success: Log(3, "create_configmap error:{}".format(rlt.message)) return rlt # 保存到etcd data['conf_keys'] = content.keys() con = configmap_struct(data) rlt = ConfigMapdb.instance().save_configmap( data.get('workspace'), data.get('name') + data.get('version'), con) if not rlt.success: return rlt WebLog( 3, u'创建', u"configmap[{}]".format( data.get('name', '') + data.get('version', '')), data.get('creater')) self.reload(1) return Result('')
def timeout(self): try: Log(3, "syndata #timeout start at:{}".format(datetime.datetime.now())) if not CluNodedb.instance().ismaster(): Log(3, "syndata this node is not master") return # 当队列中有任务不添加 if self.task_queue.qsize(): Log(3, "syndata timeout task_queue.qsize:{},".format(self.task_queue.qsize())) return clu_apply_num = {} rlt1 = Clusterdb.instance().read_clu_map() if not rlt1.success: Log(1, "syndata timeout rlt1.message:{}".format(rlt1.message)) return for c, v in rlt1.content.items(): sp_key = c.split('/') if sp_key[-1] == 'apply_num': clu_apply_num[sp_key[-2]] = v clu_node = {} rlt = CluNodedb.instance().read_clunode_map(pass_nll_value=False) if not rlt.success: return Result('', rlt.result, rlt.message, 500) if rlt.success: for k, v in rlt.content.items(): sp_key = k.split('/') if len(sp_key) == 6: clu_node.setdefault(sp_key[4], {})[sp_key[5]] = v if len(sp_key) == 5: clu_node.setdefault(sp_key[4], {}) # apply_num个数的参数 clu_ws = WorkSpacedb.instance().read_gws() if not clu_ws.success: Log(3, "syndata timeout ws :{}".format(clu_ws.message)) return for clu_name, nodes in clu_node.items(): # 只同步有主机的集群 if not nodes: continue # 更新k8s集群的vip # self.syn_vip(clu_name) # 同步更新集群应用个数 syn_clu = self.syn_apply_num(clu_name, clu_ws.content.get(clu_name, []), clu_apply_num.get(clu_name, 0)) if not syn_clu.success: Log(1, "syndata clu_info apply_num error:{}".format(syn_clu.message)) # 当apiserver连接不上时,表明集群异常(可能是刚添加,也可能是添加成功后出现异常),需要修改主机的状态 rlt = KubeClientMgr.instance().get_cluster_client(clu_name) if not rlt.success: Log(3, "rlt.message:{}".format(rlt.message)) # 如果集群状态是pending则不执行检查任务 if rlt.result == CLU_IS_PENDING: return check_num = 3 while check_num: Log(3, "check_num:{}, clu_name:{}, node:{}".format(check_num, clu_name, len(nodes))) rlt = KubeClientMgr.instance().get_cluster_client(clu_name) if rlt.success: break check_num -= 1 time.sleep(0.5) if check_num == 0: self.change_node_status(nodes, {'status': 'error', 'message': rlt.message, 'pod_num': 0}) continue client = rlt.content gws = clu_ws.content.get(clu_name, []) ws_list = [] for i in gws: ws_list.extend(i.get('workspace', [])) for n in nodes.values(): if not n['name']: rlt = LauncherClient.instance().get_host_name(clu_name, n['type'] + 's', n['ip']) if not rlt.success: continue host_name = rlt.content ip_name = n['ip'].replace('.', '-') rlt = CluNodedb.instance().update_node(clu_name, ip_name, {'name': host_name}) if not rlt.success: Log(1, "update_node error:{}".format(rlt.message)) n['name'] = host_name task = SynDataFac(clu_name, n, client, ws_list) self.create_task(task) Log(3, "syndata create task all done at:{}".format(datetime.datetime.now())) return True except Exception as e: Log(1, "sysdata error:{}".format(e.message)) PrintStack() return None except KeyboardInterrupt: Log(3, "syndata timeout be killed") return None
def workspace_delete(self, workspace_name, workspacegroup_name, cluster_name, passport): """ 删除workspace 已经实现 :param workspace_name: :param workspacegroup_name: :return: """ # 检查权限 if passport.get('ring') != 'ring0': rlt = self.__check_permission(passport.get('username'), cluster_name) if not rlt.success: return rlt if not rlt.content: return Result('', CLU_NOT_AUTH, 'not allowed', 400) # 删除应用 # /**** deploy 模块自动检测,不需要主动去调用删除应用接口 ***/ # gws = WorkSpacedb.instance().read_group_workspace(cluster_name) # Log(3, "gws:{}".format(gws.content)) # deploy = DeployClient.instance().get_apply_num(cluster_name, gws.content) # if not deploy.success: # Log(1, "get apply num error:{}".format(gws.message)) # if deploy.content > 0: # rlt = DeployClient.instance().delete_apply(cluster_name, workspacegroup_name, workspace_name) # if not rlt.success: # Log(1, "delete apply error:{}".format(rlt.message)) # 通过apiserver删除namespace rlt = KubeClientMgr.instance().delete_cluster_namespace( cluster_name, workspace_name) if not rlt.success: Log( 1, "kubeclient delete workspace:{} error:{}".format( workspace_name, rlt.message)) return rlt # 删除workspace指定的子网 rlt = NetworkMgr.instance().get_subnet_by_ws(workspace_name) if rlt.success: data = rlt.content if data: NetworkMgr.instance().del_subnet_ws({ "cluster_name": cluster_name, 'fa_ip': data.get('fa_ip'), 'key': data.get('key') }) else: Log(1, "networkmgr get_subnet_by_ws error:{}".format(rlt.message)) # 删除etcd中configmap数据 rlt = ConfigMapdb.instance().del_by_ws(workspace_name) if not rlt.success: if rlt.result != ETCD_KEY_NOT_FOUND_ERR: Log(1, "workspace delete configmap error:{}".format(rlt.message)) # 更新etcd中数据 rlt = WorkSpacedb.instance().delete_workspace(workspace_name) if not rlt.success: Log( 1, "workspacedb delete workspace:{} error:{}".format( workspace_name, rlt.message)) WebLog( 3, u'删除', u"cluster[{}]的workspace[{}]".format(cluster_name, workspace_name), passport.get('username')) return Result('')
Log(1, 'deletegroup fail,as[group name is invalid]') return Result('', PARAME_IS_INVALID_ERR, 'group invalid') StorageMgr.instance().delete_group_storage_class(group, operator) ws = WorkSpacedb.instance().get_ws_by_group(group) if not ws.success: Log(1, 'deletegroup get_ws_by_group fail,as[%s]' % (ws.message)) return Result('ok') g_d = {} for ns in ws.content: g_d.setdefault(ns['cluster_name'], []).append(ns['name']) for cluster_name, workspace_list in g_d.items(): client = KubeClientMgr.instance().get_cluster_client(cluster_name) if client: for workspace in workspace_list: # 删除pvc StorageMgr.instance().delete_workspace_pv( cluster_name, workspace, operator) # 通过apiserver删除workspace client.delete_namespace(workspace) # 删除etcd中workspace信息 WorkSpacedb.instance().delete_workspace(workspace) # 删除etcd中cluster下的member # http://192.168.14.9:8881/v1/usergroup/b/user
def __init__(self): self.clumgr = ClusterManageMgr() self.kubemgr = KubeClientMgr.instance() print 'cluster start....'
def clu_pods(self): # 一个集群下所有主机的所有容器的cadvisor监控数据 all_containers = {'container_id': {}, 'container_id': {}, ....} all_containers = dict() tasks = [] data_queue = Queue.Queue() num = 0 for node in self.nodes: one = threading.Thread(target=self.one_node_cadvisor, args=(node.get('ip'), data_queue)) tasks.append(one) num += 1 t1 = time.time() for t in tasks: t.start() for t in tasks: t.join() while True: if num == 0: break rlt = data_queue.get(timeout=2) if rlt: num -= 1 if not rlt.success: continue for k, v in rlt.content.items(): c_id = v.get('id') if c_id: all_containers[c_id] = v else: time.sleep(0.01) Log(3, "clu_pods all cost:{}".format(time.time() - t1)) # 获取一个集群的所有Ready pod rlt = KubeClientMgr.instance().all_pods(self.clu_name) if not rlt.success: Log(1, "get all_pods error:{}".format(rlt.message)) return Log(3, "all_pods... cluster:{}, len(pods):{}".format(self.clu_name, len(rlt.content))) deploy_pods = {} for pod in rlt.content: time.sleep(0.001) ns = pod.get('metadata', {}).get('namespace') name = pod.get('metadata', {}).get('annotations', {}).get('com.appsoar.ufleet.deploy', '') d_p = '%s-%s' % (ns, name) dep = self.deploys.get(d_p) if dep: Log(4, 'found a deployment:{}, pod:{}'.format(d_p, pod.get('metadata').get('name'))) pod_rep = {'resource': pod.get('spec', {}).get('containers', [])} for i in pod.get('status', {}).get('containerStatuses', []): container_id = i.get('containerID', '') if not container_id: Log(4, "not found the containerID from pod:{}".format(container_id)) continue container_id = container_id.split('//')[1] if '//' in container_id else container_id c = all_containers.pop(container_id, {}) if c: pause_id = c.get('labels', {}).get('io.kubernetes.sandbox.id', '') pod_rep.setdefault('container_data', []).append(c) pause_data = all_containers.get(pause_id, {}) if pause_data: pod_rep.setdefault('pause_data', pause_data) else: Log(2, "not found the pause_data:{}".format(pause_data)) deploy_pods.setdefault(d_p, {}).setdefault('pods_list', []).append(pod_rep) deploy_pods.setdefault(d_p, {}).setdefault('hpa', dep) else: Log(2, "not found the container:{}, host:{}".format(container_id, pod.get('status').get('hostIP'))) Log(3, "deploy_pods all cost:{}, cluster:{}".format(time.time() - t1, self.clu_name)) return deploy_pods