def get_docker_containers(user_list=None, host_namespace=''):
    """
    Get the list of running Docker containers, as `DockerContainer` objects.
    This is basically polling. Ideally, we should subscribe to Docker
    events so we can keep the containers list up to date without having to
    poll like this.

    :param host_namespace: string representing the host name (e.g. host IP)
    :param user_list: list of Docker container IDs. `None` means all
    containers.
    :return: a list of DockerContainer objects
    """
    for inspect in exec_dockerps():
        long_id = inspect['Id']

        if user_list not in ['ALL', 'all', 'All', None]:
            user_ctrs = [cid[:12] for cid in user_list.split(',')]
            short_id = long_id[:12]
            if short_id not in user_ctrs:
                continue

        try:
            c = DockerContainer(long_id, inspect=inspect,
                                host_namespace=host_namespace)
            if c.namespace:
                yield c
        except ContainerInvalidEnvironment as e:
            logger.exception(e)
Example #2
0
def get_docker_containers(user_list=None, host_namespace=''):
    """
    Get the list of running Docker containers, as `DockerContainer` objects.
    This is basically polling. Ideally, we should subscribe to Docker
    events so we can keep the containers list up to date without having to
    poll like this.

    :param host_namespace: string representing the host name (e.g. host IP)
    :param user_list: list of Docker container IDs. `None` means all
    containers.
    :return: a list of DockerContainer objects
    """
    for inspect in exec_dockerps():
        long_id = inspect['Id']

        if user_list not in ['ALL', 'all', 'All', None]:
            user_ctrs = [cid[:12] for cid in user_list.split(',')]
            short_id = long_id[:12]
            if short_id not in user_ctrs:
                continue

        try:
            c = DockerContainer(long_id,
                                inspect=inspect,
                                host_namespace=host_namespace)
            if c.namespace:
                yield c
        except ContainerInvalidEnvironment as e:
            logger.exception(e)
Example #3
0
    def _crawl_in_system(self):
        '''
        nvidia-smi returns following: MEMORY, UTILIZATION, ECC, TEMPERATURE,
        POWER, CLOCK, COMPUTE, PIDS, PERFORMANCE, SUPPORTED_CLOCKS,
        PAGE_RETIREMENT, ACCOUNTING

        currently, following are requested based on dlaas requirements:
            utilization.gpu, utilization.memory,
            memory.total, memory.free, memory.used
        nvidia-smi --query-gpu=utilization.gpu,utilization.memory,\
            memory.total,memory.free,memory.used --format=csv,noheader,nounits
        '''

        if self._init_nvml() == -1:
            return

        self.inspect_arr = exec_dockerps()

        num_gpus = pynvml.nvmlDeviceGetCount()

        for gpuid in range(0, num_gpus):
            gpuhandle = pynvml.nvmlDeviceGetHandleByIndex(gpuid)
            temperature = pynvml.nvmlDeviceGetTemperature(
                gpuhandle, pynvml.NVML_TEMPERATURE_GPU)
            memory = pynvml.nvmlDeviceGetMemoryInfo(gpuhandle)
            mem_total = memory.total / 1024 / 1024
            mem_used = memory.used / 1024 / 1024
            mem_free = memory.free / 1024 / 1024
            power_draw = pynvml.nvmlDeviceGetPowerUsage(gpuhandle) / 1000
            power_limit = pynvml.nvmlDeviceGetEnforcedPowerLimit(
                gpuhandle) / 1000
            util = pynvml.nvmlDeviceGetUtilizationRates(gpuhandle)
            util_gpu = util.gpu
            util_mem = util.memory
            entry = {
                'utilization': {
                    'gpu': util_gpu,
                    'memory': util_mem
                },
                'memory': {
                    'total': mem_total,
                    'free': mem_free,
                    'used': mem_used
                },
                'temperature': temperature,
                'power': {
                    'draw': power_draw,
                    'limit': power_limit
                }
            }
            key = self._get_feature_key(gpuhandle, gpuid)
            if gpuid == num_gpus - 1:
                self._shutdown_nvml()

            yield (key, entry, 'gpu')

        return
    def crawl(self, **kwargs):
        logger.debug('Crawling %s' % (self.get_feature()))

        for inspect in exec_dockerps():
            yield (inspect['Id'], DockerPSFeature._make([
                inspect['State']['Running'],
                0,
                inspect['Image'],
                [],
                inspect['Config']['Cmd'],
                inspect['Name'],
                inspect['Id'],
            ]), 'dockerps')
Example #5
0
    def crawl(self, **kwargs):
        logger.debug('Crawling %s' % (self.get_feature()))

        for inspect in exec_dockerps():
            yield (inspect['Id'],
                   DockerPSFeature._make([
                       inspect['State']['Running'],
                       0,
                       inspect['Image'],
                       [],
                       inspect['Config']['Cmd'],
                       inspect['Name'],
                       inspect['Id'],
                   ]), 'dockerps')
    def _crawl_in_system(self):
        '''
        nvidia-smi returns following: MEMORY, UTILIZATION, ECC, TEMPERATURE,
        POWER, CLOCK, COMPUTE, PIDS, PERFORMANCE, SUPPORTED_CLOCKS,
        PAGE_RETIREMENT, ACCOUNTING

        currently, following are requested based on dlaas requirements:
            utilization.gpu, utilization.memory,
            memory.total, memory.free, memory.used
        nvidia-smi --query-gpu=utilization.gpu,utilization.memory,\
            memory.total,memory.free,memory.used --format=csv,noheader,nounits
        '''

        if self._init_nvml() == -1:
            return

        self.inspect_arr = exec_dockerps()

        num_gpus = pynvml.nvmlDeviceGetCount()

        for gpuid in range(0, num_gpus):
            gpuhandle = pynvml.nvmlDeviceGetHandleByIndex(gpuid)
            temperature = pynvml.nvmlDeviceGetTemperature(
                gpuhandle, pynvml.NVML_TEMPERATURE_GPU)
            memory = pynvml.nvmlDeviceGetMemoryInfo(gpuhandle)
            mem_total = memory.total / 1024 / 1024
            mem_used = memory.used / 1024 / 1024
            mem_free = memory.free / 1024 / 1024
            power_draw = pynvml.nvmlDeviceGetPowerUsage(gpuhandle) / 1000
            power_limit = pynvml.nvmlDeviceGetEnforcedPowerLimit(
                gpuhandle) / 1000
            util = pynvml.nvmlDeviceGetUtilizationRates(gpuhandle)
            util_gpu = util.gpu
            util_mem = util.memory
            entry = {
                'utilization': {'gpu': util_gpu, 'memory': util_mem},
                'memory': {'total': mem_total, 'free': mem_free,
                           'used': mem_used},
                'temperature': temperature,
                'power': {'draw': power_draw, 'limit': power_limit}
            }
            key = self._get_feature_key(gpuhandle, gpuid)
            if gpuid == num_gpus - 1:
                self._shutdown_nvml()

            yield (key, entry, 'gpu')

        return
Example #7
0
 def test_dockerps(self):
     for inspect in exec_dockerps():
         c_long_id = inspect['Id']
         break  # there should only be one container anyway
     assert self.container['Id'] == c_long_id
 def test_dockerps(self):
     for inspect in exec_dockerps():
         c_long_id = inspect['Id']
         break  # there should only be one container anyway
     assert self.container['Id'] == c_long_id