def _start_mongo_db(self):
        """Start Mongo DB service.
            From https://stackoverflow.com/a/53522699/13173608.
        """
        mongo_name = f'mongo-{random.randint(0, 100000)}'

        self.docker_client.containers.run(
            'mongo', ports={'27017/tcp': self.mongo_port}, name=mongo_name,
            environment={
                'MONGO_INITDB_USERNAME': MONGO_USERNAME,
                'MONGO_INITDB_PASSWORD': MONGO_PASSWORD,
                'MONGO_INITDB_DATABASE': MONGO_DB,
            },
            labels={**self.common_labels, MODELCI_DOCKER_PORT_LABELS['mongo']: str(self.mongo_port)},
            **self.extra_container_kwargs
        )

        time.sleep(1)
        try:
            # create MongoDB user
            client = MongoClient(f'{MONGO_HOST}:{MONGO_PORT}')
            kwargs = {'pwd': MONGO_PASSWORD, 'roles': [{'role': 'readWrite', 'db': MONGO_DB}]}
            getattr(client, MONGO_DB).command("createUser", MONGO_USERNAME, **kwargs)
        except Exception as e:
            self.logger.error(f'Exception during starting MongoDB: {e}')
            container = list_containers(self.docker_client, filters={'name': mongo_name})[0]
            container.kill()
            container.remove()
            return

        check_container_status(self.docker_client, name=mongo_name)
        self.logger.info(f'Container name={mongo_name} stared')
예제 #2
0
    def _start_gpu_metrics_node_exporter(self):
        rand_num = random.randint(0, 100000)
        gpu_metrics_name = f'gpu-metrics-exporter-{rand_num}'
        dcgm_container = list_containers(
            self.docker_client,
            filters={
                'label': [MODELCI_DOCKER_PORT_LABELS['dcgm_node_exporter']]
            })[0]
        # start gpu-metric-exporter
        self.docker_client.containers.run(
            'bgbiao/gpu-metrics-exporter',
            privileged=True,
            name=gpu_metrics_name,
            ports={'9400/tcp': self.node_exporter_port},
            volumes_from=[dcgm_container.id],
            labels={
                **self.common_labels,
                MODELCI_DOCKER_PORT_LABELS['gpu_metrics_node_exporter']:
                str(self.node_exporter_port),
                MODELCI_GPU_LABEL:
                str(self.enable_gpu),
            },
            **self.extra_container_kwargs)

        check_container_status(self.docker_client, gpu_metrics_name)
        self.logger.info(f'{gpu_metrics_name} stared')
    def _start_cadvisor(self):
        """Start cAdvisor service."""
        cadvisor_name = f'cadvisor-{random.randint(0, 100000)}'

        volumes = {
            '/': {'bind': '/rootfs', 'mode': 'ro'},
            '/var/run': {'bind': '/var/run', 'mode': 'rw'},
            '/sys': {'bind': '/sys', 'mode': 'ro'},
            '/var/lib/docker': {'bind': '/var/lib/docker', 'mode': 'ro'},
        }

        extra_container_kwargs = self.extra_container_kwargs.copy()

        if self.enable_gpu:
            # find libnvidia-ml.so.1
            cache_file = Path('/tmp/libnvidia-ml.cache')
            if cache_file.exists():
                with open(cache_file) as f:
                    libnvidia_ml_path = f.read().strip()
            else:
                args1 = ('locate', 'libnvidia-ml.so.1')
                args2 = ('grep', '-v', 'lib32')
                args3 = ('head', '-1')
                locate = subprocess.Popen(args1, stdout=subprocess.PIPE)
                grep = subprocess.Popen(args2, stdin=locate.stdout, stdout=subprocess.PIPE)
                locate.wait()
                grep.wait()
                libnvidia_ml_path = subprocess.check_output(
                    args3, stdin=grep.stdout, universal_newlines=True, text=True
                ).strip()

                # save to cache
                with open(cache_file, 'w') as f:
                    f.write(libnvidia_ml_path)

            volumes.update({libnvidia_ml_path: {'bind': libnvidia_ml_path}})
            extra_container_kwargs.update({'environment': {'LD_LIBRARY_PATH': str(Path(libnvidia_ml_path).parent)}})

        self.docker_client.containers.run(
            'google/cadvisor:latest', name=cadvisor_name, ports={'8080/tcp': self.cadvisor_port},
            privileged=True, volumes=volumes,
            labels={
                **self.common_labels,
                MODELCI_DOCKER_PORT_LABELS['cadvisor']: str(self.cadvisor_port),
                MODELCI_GPU_LABEL: str(self.enable_gpu),
            },
            **extra_container_kwargs
        )

        check_container_status(self.docker_client, name=cadvisor_name)
        self.logger.info(f'Container name={cadvisor_name} started.')
    def _start_dcgm_node_exporter(self):
        """Start node exporter service."""
        rand_num = random.randint(0, 100000)

        dcgm_name = f'dcgm-exporter-{rand_num}'
        # start dcgm-exporter
        self.docker_client.containers.run(
            'bgbiao/dcgm-exporter', runtime='nvidia', name=dcgm_name,
            labels={
                **self.common_labels,
                MODELCI_DOCKER_PORT_LABELS['dcgm_node_exporter']: '-1',
                MODELCI_GPU_LABEL: str(self.enable_gpu),
            },
            **self.extra_container_kwargs
        )

        check_container_status(self.docker_client, dcgm_name)
        self.logger.info(f'Container name={dcgm_name} started.')