Exemple #1
0
class RemoveContainerHandler(APIHandler):
        
    container_opers = Container_Opers()
    
    @asynchronous    
    def post(self):
        args = self.get_all_arguments()
        logging.info('all_arguments: %s' % str(args))
        container_name = args.get('containerName')
        if not container_name:
            raise HTTPAPIError(status_code=400, error_detail="no container_name argument!",\
                                notification = "direct", \
                                log_message= "no container_name argument!",\
                                response =  "please check params!")            
        
        exists = self.container_opers.check_container_exists(container_name)
        if not exists:
            massage = {}
            massage.setdefault("status", "not exist")
            massage.setdefault("message", "no need this operation, there is no such a container!")
            self.finish(massage)
            return
          

        self.container_opers.destroy(container_name)
          
        return_message = {}
        return_message.setdefault("message", "remove container has been done but need some time, please wait a moment and check the result!")
        self.finish(return_message)
Exemple #2
0
class StopContainerHandler(APIHandler):
    
    container_opers = Container_Opers()
    
    @asynchronous
    def post(self):
        args = self.get_all_arguments()
        logging.info('all_arguments: %s' % str(args))
        container_name = args.get('containerName')
        if not container_name:
            raise HTTPAPIError(status_code=417, error_detail="no container_name argument!",\
                                notification = "direct", \
                                log_message= "no container_name argument!",\
                                response =  "please check params!")
        
        exists = self.container_opers.check_container_exists(container_name)
        if not exists:
            raise HTTPAPIError(status_code=417, error_detail="container %s not exist!" % container_name,\
                                notification = "direct", \
                                log_message= "container %s not exist!" % container_name,\
                                response =  "please check!")
        
        stat = self.container_opers.get_container_stat(container_name)
        if stat == Status.stopped:
            massage = {}
            massage.setdefault("status", stat)
            massage.setdefault("message", "no need this operation, the container has been stopped!")
            self.finish(massage)
            return
        
        self.container_opers.stop(container_name)
        
        return_message = {}
        return_message.setdefault("message", "due to stop a container need a little time, please wait and check the result~")
        self.finish(return_message)
Exemple #3
0
class Containers_Oom_Worker(Abstract_Async_Thread):

    container_opers = Container_Opers()

    def __init__(self, timeout=55):
        self.timeout = timeout
        super(Containers_Oom_Worker, self).__init__()

    def run(self):

        try:
            zk_opers = Scheduler_ZkOpers()
            cluster_list = zk_opers.retrieve_cluster_list()
            if not cluster_list:
                logging.info('no cluster is created, no need to do this!')
                return
            self.__action_record_containers_resource()
        except Exception:
            self.threading_exception_queue.put(sys.exc_info())

    def __action_record_containers_resource(self):
        logging.info('record containers under_oom, oom_kill_disable value')
        resource_items = ['under_oom', 'oom_kill_disable']
        for resource_item in resource_items:
            resource_info = self.container_opers.get_containers_resource(
                resource_item)
            self.container_opers.write_containers_resource_to_zk(
                resource_item, resource_info)
class ContainerCluster_create_Action(Base_ContainerCluster_create_Action):

    component_container_cluster_config_factory = ComponentContainerClusterConfigFactory(
    )
    container_opers = Container_Opers()

    def __init__(self, args):
        super(ContainerCluster_create_Action, self).__init__(args)
        self.args = args

    def run(self):
        __action_result = Status.failed
        __error_message = ''
        cluster = self.args.get('containerClusterName')
        try:
            logging.debug('begin create')
            __action_result = self.create(self.args)
        except:
            self.threading_exception_queue.put(sys.exc_info())
        finally:
            self.update_zk_info_when_process_complete(cluster, __action_result,
                                                      __error_message)

    def create(self, args):
        logging.info('args:%s' % str(args))
        _component_type = args.get('componentType')
        _network_mode = args.get('networkMode')
        _cluster = self.args.get('containerClusterName')

        _component_container_cluster_config = self.component_container_cluster_config_factory.retrieve_config(
            args)
        node_count = _component_container_cluster_config.nodeCount
        _component_container_cluster_config.sum_count = node_count
        container_names = self.container_opers.generate_container_names(
            _component_type, node_count, _cluster)
        _component_container_cluster_config.container_names = container_names
        args.setdefault('component_config',
                        _component_container_cluster_config)

        self.__create_cluser_info_to_zk(_network_mode, _component_type,
                                        _component_container_cluster_config)
        return super(ContainerCluster_create_Action, self).create(args)

    def __create_cluser_info_to_zk(self, network_mode, component_type,
                                   component_container_cluster_config):
        containerCount = component_container_cluster_config.nodeCount
        containerClusterName = component_container_cluster_config.container_cluster_name
        use_ip = 'bridge' != network_mode

        _container_cluster_info = {
            'containerCount': containerCount,
            'containerClusterName': containerClusterName,
            'type': component_type,
            'isUseIp': use_ip
        }
        zkOper = Container_ZkOpers()
        zkOper.write_container_cluster_info(_container_cluster_info)
Exemple #5
0
class SetContainerCpusharesHandler(APIHandler):
    
    container_opers = Container_Opers()
    
    def post(self):
        args = self.get_all_arguments()
        ret = self.container_opers.set_container_cpushares(args)
        
        self.finish(ret)
Exemple #6
0
class CheckContainerStatusHandler(APIHandler):
    '''
    classdocs
    '''
    container_opers = Container_Opers()
    
    @asynchronous
    def get(self, container_name):
        status = self.container_opers.check(container_name)
        self.finish(status)
Exemple #7
0
class SetContainerCpusetHandler(APIHandler):
    
    container_opers = Container_Opers()
    
    def post(self):
        args = self.get_all_arguments()
        ret = self.container_opers.set_container_cpuset(args)
        
        result = {}
        result.setdefault(ret)
        self.finish(result)
class CheckContainerStatusHandler(BaseContainerHandler):
    '''
    classdocs
    '''
    container_opers = Container_Opers()

    @asynchronous
    @engine
    def get(self, container_name):
        result = yield self.do(container_name)
        self.finish(result)

    @run_on_executor()
    @run_callback
    def do(self, container_name):
        return self.container_opers.check(container_name)
Exemple #9
0
class ContainerResourceHandler(object):

    con_op = Container_Opers()
    con_cache = ContainerCache()
    containers_diskio = {}
    containers_networkio = {}
    containers_cpuratio = {}

    def check_container_node_condition(self, container_node_detail):
        is_cluster_start = self.con_op.cluster_start(
            container_node_detail.cluster_name)
        is_container_name_legal = self.con_op.check_container_name_legal(
            container_node_detail.container_name)
        return container_node_detail and is_cluster_start and is_container_name_legal

    def get_container_nodes(self):
        """
        获取需要资源采集的容器信息。此处曾可能导致内存溢出,张增排查后并未
        改进代码。现移除原来的局部变量 container_nodes = [], 换用yield。
        """
        current_ids = self.con_cache.current_ids.copy()
        for con_id in current_ids:
            detail = self.con_cache.find_detail_by_id(con_id)
            # 若当前id不在上一次缓存列表中, 则进行检查
            # 否则在上一次缓存中,表示上一次已经检查过了
            # 此次不再进行检查,降低连接zookeeper等消耗
            if con_id not in self.con_cache.old_ids:
                # 则进行采集前置条件检查
                check_passed = self.check_container_node_condition(detail)
                # 若检查不通过, 将容器信息置为 None
                if not check_passed:
                    detail = None
            if detail is not None:
                yield detail
        del current_ids

    def write_to_es(self, resource_type, doc):
        _now = datetime.utcnow()
        _date = _now.strftime('%Y%m%d')
        _index = "monitor_container_resource_{0}_{1}".format(
            resource_type, _date)
        doc.update({'timestamp': _now})
        ServerRes.index(index=_index, doc_type=resource_type, body=doc)

    def gather(self):
        raise NotImplemented("this gather method should be implemented")
Exemple #10
0
class ContainerHandler(APIHandler):
    
    container_opers = Container_Opers()
    component_docker_model_factory = ComponentDockerModelFactory()

    #@asynchronous
    def post(self):
        args = self.get_all_arguments()
        docker_model = self.__create_docker_module(args)
        self.container_opers.create(docker_model)
        return_message = {}
        return_message.setdefault("message", "Success Create Container")
        self.finish(return_message)

    def __create_docker_module(self, arg_dict):
        logging.info('get create container args : %s, type:%s' % (str(arg_dict), type(arg_dict)) )
        docker_model = self.component_docker_model_factory.create(arg_dict)
        return docker_model
Exemple #11
0
class GatherClusterResourceHandler(APIHandler):
    '''
        the result is webportal need, return to webportal
    '''

    container_opers = Container_Opers()

    def cluster_resoure(self, cluster, resource_type):
        zkOper = Requests_ZkOpers()

        exists = zkOper.check_containerCluster_exists(cluster)
        if not exists:
            error_message = 'container cluster %s not exist, please check your cluster name' % cluster
            raise HTTPAPIError(status_code=417, error_detail=error_message,
                               notification="direct",
                               log_message=error_message,
                               response=error_message)

        container_node_list = zkOper.retrieve_container_list(cluster)
        result = []

        for container_node in container_node_list:
            resource = {}
            resource_value = zkOper.retrieve_container_resource(cluster, container_node, resource_type)
            host_ip = self.container_opers.get_host_ip_from_zk(cluster, container_node)
            container_name = self.container_opers.get_container_name_from_zk(cluster, container_node)
            resource.setdefault('value', resource_value)
            resource.setdefault('hostIp', host_ip)
            resource.setdefault('containerName', container_name)
            result.append(resource)

        return result

    @asynchronous
    @engine
    def get(self, cluster, resource_type):
        result = yield self.do(cluster, resource_type)
        self.finish({'data': result})

    @run_on_executor()
    @run_callback
    def do(self, cluster, resource_type):
        return self.cluster_resoure(cluster, resource_type)
class BaseContainerHandler(APIHandler):

    container_opers = Container_Opers()

    def check_container_name(self, container_name):
        exists = self.container_opers.check_container_exists(container_name)
        if not exists:
            error_message = 'container %s not exist, please check your container name' % container_name
            raise HTTPAPIError(status_code=417,
                               error_detail=error_message,
                               notification="direct",
                               log_message=error_message,
                               response=error_message)

    def get_container_resource(self, container_name, resource_type):
        zk_opers = Requests_ZkOpers()

        result = {}
        cluster_name = get_containerClusterName_from_containerName(
            container_name)
        node_name = self.container_opers.get_container_node_from_container_name(
            cluster_name, container_name)
        resource_value = zk_opers.retrieve_container_resource(
            cluster_name, node_name, resource_type)

        result.setdefault('value', resource_value)
        result.setdefault('containerName', container_name)
        return result

    @asynchronous
    @engine
    def get(self, container_name, resource_type):
        result = yield self.do(container_name, resource_type)
        self.finish(result)

    @run_on_executor()
    @run_callback
    def do(self, container_name, resource_type):
        self.check_container_name(container_name)
        return self.get_container_resource(container_name, resource_type)
Exemple #13
0
class ManagerStatusHandler(APIHandler):

    container_opers = Container_Opers()
    
    @asynchronous
    def post(self):
        """
            eg. curl --user root:root -d  "containerName=d-mcl-zz2-n-3&componentType=mcluster" /container/manager/status
        """
        args = self.get_all_arguments()
        container_name = args.get('containerName')
        component_type = args.get('componentType')
        if not (container_name and component_type):
            raise HTTPAPIError(status_code=417, error_detail="no containerName or componentType argument!",\
                                notification = "direct", \
                                log_message= "no containerName or componentType argument!",\
                                response =  "please check params!")
        
        ret = self.container_opers.manager_status_validate(component_type, container_name)
        
        result = {}
        result.setdefault("message", ret)
        self.finish(result)
class Base_ContainerCluster_Action(Abstract_Async_Thread):
    """if param "containers" not given, the action is about cluster;

    if param "containers" are given, the action is about such containers.
    """

    container_opers = Container_Opers()

    def __init__(self, containerClusterName, action, containers=None):
        super(Base_ContainerCluster_Action, self).__init__()
        self.cluster = containerClusterName
        self.action = action
        self.containers = containers

    def run(self):
        try:
            self.__issue_action()
        except:
            self.threading_exception_queue.put(sys.exc_info())

    def __issue_action(self):
        params = self.__get_params()
        adminUser, adminPasswd = _retrieve_userName_passwd()
        logging.info('params: %s' % str(params))

        async_client = AsyncHTTPClient()
        try:
            for host_ip, container_name_list in params.items():
                logging.info('container_name_list %s in host %s ' %
                             (str(container_name_list), host_ip))
                for container_name in container_name_list:
                    args = {'containerName': container_name}
                    request_uri = 'http://%s:%s/container/%s' % (
                        host_ip, options.port, self.action)
                    logging.info('post-----  url: %s, \n body: %s' %
                                 (request_uri, str(args)))
                    async_http_post(async_client,
                                    request_uri,
                                    body=args,
                                    auth_username=adminUser,
                                    auth_password=adminPasswd)
        finally:
            async_client.close()

        if self.action == 'remove' and self._check_is_cluster_destroyed(
                container_name_list):
            self.do_when_remove_cluster()

    def _check_is_cluster_destroyed(self, container_name_list):
        timeout = 30
        for i in range(timeout):
            _destroyed_sum = 0
            for container_name in container_name_list:
                stats = self.container_opers.retrieve_container_status_from_containerName(
                    container_name)
                if stats and stats.get('status') != Status.destroyed:
                    break
                else:
                    _destroyed_sum += 1
            if _destroyed_sum == len(container_name_list):
                return True
            time.sleep(2)
        return False

    def do_when_remove_cluster(self):
        zkOper = Container_ZkOpers()
        cluster_info = zkOper.retrieve_container_cluster_info(self.cluster)
        use_ip = cluster_info.get('isUseIp')
        if use_ip:
            container_ip_list = zkOper.retrieve_container_list(self.cluster)
            logging.info('container_ip_list:%s' % str(container_ip_list))
            zkOper.recover_ips_to_pool(container_ip_list)

    def __get_params(self):
        """
            two containers may be with a host_ip
        """

        params, container_info, container_nodes = {}, {}, []

        zkOper = Container_ZkOpers()
        if self.containers:
            for container in self.containers:
                container_node = self.container_opers.get_container_node_from_container_name(
                    self.cluster, container)
                container_nodes.append(container_node)
        else:
            container_nodes = zkOper.retrieve_container_list(self.cluster)
        self.container_nodes = container_nodes
        for container_node in self.container_nodes:
            container_name_list = []
            container_info = zkOper.retrieve_container_node_value(
                self.cluster, container_node)
            container_name = container_info.get('containerName')
            host_ip = container_info.get('hostIp')
            container_name_list.append(container_name)
            if host_ip in params:
                container_name_list.extend(params[host_ip])
            params[host_ip] = container_name_list
        return params
class Server_Res_Opers():
    '''
    # TODO: 资源采集与写入分离,规划好接口, 重构一下?
    classdocs
    '''

    _logger = logging.getLogger("process_info")
    _logger.setLevel(logging.INFO)

    docker_opers = Docker_Opers()
    container_opers = Container_Opers()
    _server_cpu_ratio = CPURatio()

    def __init__(self, container_name=""):
        self.name = container_name
        if self.name != "":
            self.matrix_list = self.get_top_cmd_ret()
            self.id_pid_dict = self.get_container_id_pid_dict(self.name)

    def container_count(self):
        return len(self.container_opers.get_all_containers())

    def memory_stat(self):
        mem, stat = {}, {}
        f = open("/proc/meminfo", "r")
        lines = f.readlines()
        f.close()
        for line in lines:
            if len(line) < 2:
                continue
            name = line.split(':')[0]
            var = line.split(':')[1].split()[0]
            mem[name] = long(var) * 1024.0
        stat['total'] = int(mem['MemTotal'])
        stat['used'] = int(mem['MemTotal'] - mem['MemFree'] -
                           mem['Buffers'] - mem['Cached'])
        stat['free'] = int(mem['MemFree'] + mem['Buffers'] +
                           mem['Cached'])
        return stat

    def disk_iops(self):
        mountpoints = ('/srv/docker/vfs', '/srv')
        result = diskio.iops(mountpoints)
        return result

    def srv_disk_stat(self):
        """
        @todo:  监控所有磁盘和分区
        """
        result = disk_stat('/srv/docker/vfs')
        return result

    def disk_loadavg(self):
        loadavg = {}
        f = open("/proc/loadavg", "r")
        con = f.read().split()
        f.close()
        loadavg['lavg_1'] = con[0]
        loadavg['lavg_5'] = con[1]
        loadavg['lavg_15'] = con[2]
        loadavg['nr'] = con[3]
        loadavg['last_pid'] = con[4]
        self._logger.info("disk io information: " + str(loadavg))
        return loadavg

    def cpu_ratio(self):
        return self._server_cpu_ratio.get_result()

    @property
    def server_cpu_ratio(self):
        return self._server_cpu_ratio