def get_config(self, key=None): if not key: return self.conf if "." not in key: if key in self.conf: return self.conf[key] else: arr = key.split(".") config = copy.deepcopy(self.conf) for i in range(len(arr)): if arr[i] in config: config = config[arr[i]] else: config = None Log(4, "invalid key %s on %s" % (arr[i], key)) break return config # Log(1,"get_config fail,key:%s"%key) return None
def launch_task(self): Log(4, "SaveStorageClassInfoTask.launch_task") try: rlt = self.workbench.save_storage_class_info() if rlt.success: self.log("save_storage_class_info success.") else: self.log("save_storage_class_info fail. as[%s]" % (rlt.message)) return rlt except InternalException, ex: self.log( "SaveStorageClassInfoTask save_storage_class_info fail,as[%s]" % (ex.value), ex.errid) return Result( 'InternalException', ex.errid, "SaveStorageClassInfoTask launch_task fail,as[%s]" % (ex.value))
def list(self, **kwargs): """ # 获取集群的列表 :return: """ try: offset = kwargs.get('offset', 0) limit = kwargs.get('limit', None) passport = kwargs.get('passport', {}) userid = passport.get('ring') username = passport.get('username') return self.clumgr.get_cluster_list(limit=limit, offset=int(offset), userid=userid, username=username) except Exception, e: PrintStack() Log(1, "cluster list,error:{}".format(e)) return Result('', INTERNAL_EXCEPT_ERR, 'server error')
def delete_record_by_name(self, workspace, app_name, record_name): rlt = self.read_list(key_id='id') if not rlt.success: Log( 1, 'MonitorMgr.delete_record_by_name read_all fail,as[%s]' % (rlt.message)) return rlt if app_name: for record in rlt.content: if record.get('name') == app_name and record.get( 'workspace') == workspace: return self.delete_record(record['id']) else: for record in rlt.content: if record.get('record_name') == record_name and record.get( 'workspace') == workspace: return self.delete_record(record['id']) return Result('', ETCD_RECORD_NOT_EXIST_ERR, 'The record not exist')
def load_data(self, sub_dir=None): if sub_dir is None: root_path = self.root_path else: root_path = '%s/%s' % (self.root_path, sub_dir) rlt = self.etcd.read_key_list(root_path) if not rlt.success: Log(1, 'ETCDMgr.load data[%s] fail,as[%s]' % (root_path, rlt.message)) return {} data = {} for key in rlt.content: result = self.etcd.read_key_list('%s/%s' % (root_path, key)) if result.success: for sub_key in result.content: data[sub_key] = key return result(data)
def delete_master_by_name(self, workspace, app_name, master_name): rlt = self.read_list(key_id='id') if not rlt.success: Log( 1, 'MasterMgr.delete_master_by_name read_all fail,as[%s]' % (rlt.message)) return rlt if app_name: for master in rlt.content: if master.get('name') == app_name and master.get( 'workspace') == workspace: return self.delete_master(master['id']) else: for master in rlt.content: if master.get('master_name') == master_name and master.get( 'workspace') == workspace: return self.delete_master(master['id']) return Result('', ETCD_RECORD_NOT_EXIST_ERR, 'The master not exist')
def parse_token(self, token_str): if not token_str: Log(3, 'Unauthorized visit.') return {'ring': 'ring8'} if isinstance(token_str, list): token_str = token_str[0] data = self.__store.get(token_str, None) if data: return data['passport'] passport = self._parse_token(token_str) if passport: self.__store[token_str] = { 'passport': passport, 'expire': NowMilli() + 60000 } return passport
def add_list_value(self, path, list_data): """ :param path: :param list_data: :return: """ try: rlt = self.client.read(path) except etcd.EtcdConnectionFailed: return Result(0, ETCD_CONNECT_FAIL_ERR, 'Etcd Connection Failed.') except etcd.EtcdKeyNotFound: return Result(0, ETCD_KEY_NOT_FOUND_ERR, 'The path[%s]not exist.' % (path)) Log(4, "add_list_value:{}".format(rlt.value)) value = json.loads(rlt.value) rlt.value = json.dumps(value + list_data) result = self.client.update(rlt) if result: return Result('') return Result('', ETCD_UPDATE_FAIL_ERR, 'update_list_value fail.')
def timeout_before(self): # all ns's all deployments deploys = {} all_deploys = DeployClient.instance().all_deploy() if not all_deploys: return Log(4, 'all_deploys:{}'.format(all_deploys)) for deploy in all_deploys: ns = deploy.get('workspace') name = deploy.get('name') if ns and name: deploys.setdefault('%s-%s' % (ns, name), deploy) clu_nodes = self.get_clu_nodes() if not clu_nodes: return for clu_name, nodes in clu_nodes.items(): if deploys and nodes: self.create_task(clu_name, nodes, deploys)
def get_all_nodes(self): """ # 通过api获取添加成功的主机 """ url = '/nodes' try: r = self.client.request(method='GET', url=url, timeout=self.timeout) except Exception as e: Log(3, 'get_apinodes except{}'.format(e)) return Result('', msg='get_apinodes except{}'.format(e), result=500) if r.status_code != 200: return Result('', FAIL, r.text) data = r.json() return Result(data.get('items', []))
def set_label(self, post_data, **kwargs): """ 给主机设置标签:添加或者删除标签 已实现 :param kwargs: :return: """ try: data = json.loads(post_data.replace("'", "\'")) except Exception as e: Log(1, "set_label.parse data to json fail.input[%s]" % (post_data)) return Result('', INVALID_JSON_DATA_ERR, str(e)) cluster_name = data.get('cluster_name', '') host_name = data.get('host_name', '') labels = data.get('labels', {}) username = kwargs.get('passport', {}).get('username', 'unknown') return self.clumgr.set_label(cluster_name=cluster_name, host_name=host_name, labels=labels, username=username)
def check_valid(self): """ # 检查数据 """ try: if self.client is None: self.client = VespaceMgr.instance().get_cluster_client( self.cluster_name) if not (self.client and self.client.test()): return Result('', INIT_VESPACE_CLIENT_FAILERR, 'init vespace client fail.') if StoregeClusterDB.instance().get_cluster_num( self.cluster_name) <= 1: return Result('', LAST_CLUSTER_CANNOT_DELETE_ERR, 'The last cluster can not be delete') except InternalException, e: Log(1, "DeleteStorageWork.check_valid except[%s]" % (e.value)) return Result("DeleteStorageWork", e.errid, e.value)
def delete_cluster(self, **kwargs): """ # 删除集群集群,如果有资源则不让删除 # 删除k8s存储集群,如果只剩最后一个集群,则删除失败 :param post_data: :return: """ cluster_name = kwargs.get('cluster_name', None) username = kwargs.get('passport', {}).get('username', 'unknown') if cluster_name is None: return Result(0, msg='request error', code=400) rlt = self.clumgr.delete_cluster(cluster_name=cluster_name, username=username) if rlt.success: StorageMgr.instance().delete_storage_cluster(cluster_name) else: Log( 1, 'delete_cluster[%s][%s] fail,as[%s]' % (username, cluster_name, rlt.message)) return rlt
def pod_pause_id(self, **kwargs): """ 获取pod的pause容器id, 用于容器监控时获取容器的网络数据 :param host_ip: :param container_id: :return: """ try: host_ip = kwargs.get('host_ip') container_id = kwargs.get('container_id') if container_id: container_id = container_id.split( '//')[1] if '//' in container_id else container_id return KubeClientMgr.instance().get_pause_id( host_ip, container_id) else: return Result('', 400, 'param error', 400) except Exception as e: PrintStack() Log(1, "pod_pause id error:{}".format(e.message)) return Result('', 500, '', 500)
def remove_master(self, **kwargs): """ :param kwargs: :return: """ cluster_name = kwargs.get('cluster_name', None) passport = kwargs.get('passport', {}) master_name = kwargs.get('master_name', None) host_ip = master_name.replace('-', '.') host_real_name = kwargs.get('name', '') rlt = self.clumgr.remove_master(cluster_name, host_ip, host_real_name, passport) if rlt.success: StorageMgr.instance().delete_storage_cluster(cluster_name) else: Log( 1, 'remove_master[%s][%s][%s] fail,as[%s]' % (passport.get('username'), master_name, cluster_name, rlt.message)) return rlt
def check_valid(self): """ # 检查数据 """ try: if self.client is None: self.client = VespaceMgr.instance().get_cluster_client( self.cluster_name) if not (self.client and self.client.test()): return Result('', INIT_VESPACE_CLIENT_FAILERR, 'init vespace client fail.') if StorageNodeDB.instance().is_node_exist(self.cluster_name, self.ip): return Result('', STORAGE_NODE_EXIST_ALREADY_ERR, 'The node is added.') except InternalException, e: Log(1, "AddStorageNodeWork.check_valid except[%s]" % (e.value)) return Result("AddStorageNodeWork", e.errid, e.value)
def get_cluster_client(self, cluster_name): """ # 获取单个集群的 apiserver client """ self.reload() if cluster_name in self.__store: return self.__store[cluster_name] rlt = StoregeClusterDB.instance().get_cluster_info(cluster_name) if not rlt.success: Log( 1, "VespaceMgr.get_cluster_client get_cluster_info fail as[%s]" % (rlt.message)) return None client = self.load_cluster(rlt.content) if client: self.__store[cluster_name] = client return client return None
def find_all_Ethernet_interface(self): """获取当前主机的所有网卡列表。 # 参数: 无参数 # 返回值: 网卡列表 # 不显示容器网卡(veth,docker开头的.) """ if not os.path.isfile('/host/proc/net/dev'): return [] ethernet_list = [] with open('/host/proc/net/dev', 'r') as fp: for line in fp: if (line.find('docker') >= 0) or (line.find('veth') >= 0) or ( line.find('lo:') >= 0): continue elif line.find(':') >= 0: index = line.index(':') line = line[0:index] ethernet_list.append(line.strip()) # return Result(ethernet_list) Log(4, "find a network iface[%s]" % (line.strip())) return ethernet_list
def _get_disk_info(self, cluster_name, node_info): client = VespaceMgr.instance().get_cluster_client(cluster_name) rlt = client.get_storage_host_info(node_info['cluster_id'], node_info['domain_name'], node_info['ip'], node_info['store_api_port']) if not rlt.success: Log( 1, 'Storage.get_storage_host_info [%s][%s]fail,as[%s]' % (node_info['cluster'], node_info['ip'], rlt.message)) return rlt disk_list = (rlt.content.get('Disk') or {}).get('Dbi', []) engine = rlt.content.get('Engine') if isinstance(engine, dict): DataDevices = engine.get('DataDev') or [] CacheDevices = engine.get('CacheDev') or [] else: DataDevices = [] CacheDevices = [] data = {} for device in DataDevices: device['engine'] = 'data' data[device['Device']] = device for device in CacheDevices: device['engine'] = 'cache' data[device['Device']] = device for disk in disk_list: disk_path = disk['Path'] if disk_path in data: disk.update(data[disk_path]) disk['added'] = True else: disk['added'] = False return Result(disk_list)
def create_sshclient(self): # 构建客户端 self.sshclient = paramiko.SSHClient() if self.prikey: not_read_a_file = StringIO.StringIO(self.prikey) private_key = paramiko.RSAKey.from_private_key( not_read_a_file, self.prikeypwd) not_read_a_file.close() else: private_key = None self.sshclient.set_missing_host_key_policy(paramiko.AutoAddPolicy()) try: self.sshclient.connect(self.host_ip, self.port, username=self.username, password=self.passwd, pkey=private_key, timeout=4) except Exception as e: Log(1, 'remoteparam connect error:{}'.format(e.message)) return Result('', 400, 'connect host error:' + e.message) return Result('')
def get_net_data(self, host_ip, pause_id): """ get net data :param host_ip: :return: """ r_d = [] cadvisor_cli = Cadvisor(host_ip) rlt = cadvisor_cli.get(pause_id) if rlt.success: net_data = rlt.content if net_data and isinstance(net_data, dict): data = net_data.values()[0] j = data.get('stats', []) for k in j: net_info = { 'timestamp': k.get('timestamp', ''), 'net': k.get('network', {}).get('interfaces', []) } r_d.append(net_info) else: Log(1, "get net data from cadvisor error:{}".format(rlt.message)) return r_d
def subnet_workspace(self, cluster_name): """ 获取可被指派的工作区 :param cluster_name: :return: """ # 集群上的所有workspace列表 rlt = WorkSpacedb.instance().get_ns_by_cluster(cluster_name) if not rlt.success: return rlt w_list = [] for i in rlt.content: w_list.append(i['name']) # 已经被指定过的workspace rlt = NetworkMgr.instance().get_ippool_clu(cluster_name, 0) if not rlt.success: Log(1, "workspace subnet_worksapce error:{}".format(rlt.message)) return rlt for i in rlt.content: if i['workspace'] in w_list: w_list.remove(i['workspace']) return Result(w_list)
def workspce_remain(self, cluster_name): """ :param cluster_name: :return: """ rlt = CluNodedb.instance().read_node_list(cluster_name) if not rlt.success: return rlt # cluster_info = self.clu_info.get_node(cluster_name) cpu_1 = 0 mem_1 = 0 cpu_2 = 0 mem_2 = 0 Log(4, "workspace_remain:{}".format(rlt.content)) for i in rlt.content: if i.get('status', '') == 'running': c = i.get('cpu', '') m = i.get('memory', '') if c: cpu_1 += int(i.get('cpu', 0)) if m: mem_1 += float(i.get('memory', 0)[:-2]) # 获取已经添加的workspace所占用资源 rlt = WorkSpacedb.instance().clu_used(cluster_name) # used_workspace = get_workspace_list(self.etcd, cluster_name) if rlt.success: for i in rlt.content: cpu_2 += i.get('cpu', 0) mem_2 += float(i.get('mem', 0)) else: if rlt.result != ETCD_KEY_NOT_FOUND_ERR: return Result('', msg=rlt.message, result=500, code=500) return Result({ 'cpu_remain': round(cpu_1 * 0.8 - cpu_2, 2), 'mem_remain': round(mem_1 * 0.8 - mem_2, 3) })
class SaveStorageClassInfoTask(SubTask): def __init__(self, task_info, workbench): super(SaveStorageClassInfoTask, self).__init__(task_info, SAVE_STORAGE_CLASS_TASK_SUFFIX) self.task_type = SAVE_STORAGE_CLASS_TASK self.index = SAVE_STORAGE_CLASS_INDEX self.weight = 0.8 self.workbench = workbench def launch_task(self): Log(4, "SaveStorageClassInfoTask.launch_task") try: rlt = self.workbench.save_storage_class_info() if rlt.success: self.log("save_storage_class_info success.") else: self.log("save_storage_class_info fail. as[%s]" % (rlt.message)) return rlt except InternalException, ex: self.log( "SaveStorageClassInfoTask save_storage_class_info fail,as[%s]" % (ex.value), ex.errid) return Result( 'InternalException', ex.errid, "SaveStorageClassInfoTask launch_task fail,as[%s]" % (ex.value)) except Exception, e: PrintStack() self.log("launch_task except[%s]" % (str(e))) Log(1, "SaveStorageClassInfoTask launch_task fail,as[%s]" % (str(e))) return Result( self._id, 1, "SaveStorageClassInfoTask launch_task fail,as[%s]" % (str(e)))
def read_list(self, key_prefix, **args): sort_order = args.get('sort_order') sort_target = args.get('sort_target', 'key') rlt = self.safe_get_prefix(key_prefix, sort_order, sort_target) if not rlt.success: Log(1, 'read_list[{}] fail,as[{}]'.format(key_prefix, rlt.message)) return rlt length = len(key_prefix) + 1 suffix = args.get('suffix', '') suffix_length = len(suffix) skip = args.get('skip_suffix', '') skip_length = len(skip) key_id = args.get('key_id', ID) arr = [] for value, meta in rlt.content: if 0 == len(value): continue if suffix_length and meta.key[-suffix_length:] != suffix: continue if skip_length and meta.key[-skip_length:] == skip: continue is_json, value = self._parse_2_json(value) if is_json: value[key_id] = meta.key[length:] arr.append(value) else: arr.append({key_id: meta.key[length:], 'value': value}) return Result(arr)
def create(self, data): """ :param data: :return: """ content = data.get('content', '') name = content.get('metadata', {}).get('name') clu_name = data.get('cluster_name', '') if not all([name, clu_name]): return Result('', 400, 'param error', 400) rlt = KubeClientMgr.instance().create_clusterrolebinding( clu_name, name, content) if not rlt.success: Log(1, "clusterrolebinding create error:{}".format(rlt.message)) return rlt # d_s = clusterrole(data) # 保存到etcd # rlt = ClusterRoledb.instance().save(name, d_s) # if not rlt.success: # return rlt WebLog(3, u'创建', "clusterrolebinding[{}]".format(name, data.get('creater'))) return Result('')
def delete(self, workspace, conf_name, username): """ 删除config :param name: :param version: :return: """ rlt = ConfigMapdb.instance().read_configmap(workspace, conf_name) if not rlt.success: if rlt.result == ETCD_KEY_NOT_FOUND_ERR: return Result( '', 400, 'the configmap[{}] not existed'.format(conf_name), 400) return rlt # 删除etcd中数据 rlt = ConfigMapdb.instance().del_configmap(workspace, conf_name) if not rlt.success: if rlt.result != ETCD_KEY_NOT_FOUND_ERR: WebLog( 3, u'删除', u"workspace[{}]下的configmap[{}]".format( workspace, conf_name), username) self.reload(1) return rlt # 通过apiserver删除configmap rlt = KubeClientMgr.instance().delete_configmap(workspace, conf_name) if not rlt.success: Log(1, "configmap delete get kubeclient error:{}".format(rlt.message)) # if rlt.code == 404 or rlt.result == FAIL or rlt.result == ETCD_KEY_NOT_FOUND_ERR: # pass # else: return rlt return Result('')
def load_cluster(self, clu_info): """ 纳管集群 :param clu_info: :param timeout: :return: """ url = self.laucher_url + '/load' try: r = requests.post(url, json.dumps(clu_info), headers={"content-type": "application/json"}, timeout=self.timeout) except requests.exceptions.RequestException as e: return Result('', msg='create_node except:{}'.format(e), result=500, code=500) else: Log( 3, "laod cluster url:{}, data:{}".format(url, json.dumps(clu_info))) return Response(r)
def events(self, **kwargs): """ 获取一个group下所有事件 :param kwargs: :return: """ try: offset = kwargs.get('offset', '1') limit = kwargs.get('limit', '10') group = kwargs.get('group', '') workspace = kwargs.get('workspace', '') start_time = kwargs.get('start_time', '') end_time = kwargs.get('end_time', '') if offset: offset = int(offset) if limit: limit = int(limit) return self.clumgr.get_ws_events(group, workspace, start_time, end_time, offset, limit) except Exception as e: PrintStack() Log(1, "get events error:{}".format(e.message)) return Result('', 500, '', 500)
def timeout(self): Log(3, "elastic timeout start at:{}".format(datetime.datetime.now())) if not Clusterdb.instance().ismaster(): Log(3, "elastic this node is not master") return if self.task_queue.qsize(): Log(3, "elastic the queue is not None. queue size:{}".format(self.task_queue.qsize())) return try: self.timeout_before() Log(3, "elastic create task done at:{}".format(datetime.datetime.now())) return None except Exception as e: PrintStack() Log(3, "elastic timeout:{}".format(e.message)) return None except KeyboardInterrupt: Log(3, "elastic killed") return None