Esempio n. 1
0
 def reload(self):
     # refresh no matter what
     try:
         with podman.ApiConnection(podman_url) as podman_api:
             self.inspection = podman.containers.inspect(podman_api, self.name)
     except podman.errors.InternalServerError:
         logger.error(f"error 500 with {self.name}")
Esempio n. 2
0
    def _gather_system_facts(self, figures_by_course):
        # ds stands for disk_space
        if self._graphroot is None:
            with podman.ApiConnection(podman_url) as podman_api:
                self._graphroot = podman.system.info(podman_api)['store']['graphRoot']
        nbhroot = sitesettings.nbhroot
        system_root = "/"
        disk_spaces = {}
        for name, root in (('container', self._graphroot),
                           ('nbhosting', nbhroot),
                           ('system', system_root)):
            disk_spaces[name] = {}
            try:
                stat = os.statvfs(root)
                disk_spaces[name]['percent'] = round(100 * stat.f_bfree / stat.f_blocks)
                # unit is MiB
                disk_spaces[name]['free'] = round((stat.f_bfree * stat.f_bsize) / (1024**2))

            except Exception:
                disk_spaces[name]['free'] = 0
                disk_spaces[name]['percent'] = 0
                logger.exception(
                    f"monitor cannot compute disk space {name} on {root}")

        # loads
        try:
            uptime_output = subprocess.check_output('uptime').decode().strip()
            end_of_line = uptime_output.split(':')[-1]
            floads = end_of_line.split(', ')
            load1, load5, load15 = [round(100*float(x)) for x in floads]

        except Exception:
            load1, load5, load15 = 0, 0, 0
            logger.exception(f"monitor cannot compute cpu loads")

        loads = dict(load1=load1, load5=load5, load15=load15)

        # memory from /proc/meminfo
        try:
            def handle_line(line):
                _label, value, unit = line.split()
                if unit == 'kB':
                    return int(value) * 1024
                logger.warning(f"unexpected unit {unit} in meminfo")
                return 0
            with open("/proc/meminfo") as feed:
                total_line = feed.readline()
                free_line  = feed.readline()
                avail_line  = feed.readline()
                total_mem = handle_line(total_line)
                free_mem = handle_line(free_line)
                avail_mem = handle_line(avail_line)
        except:
            logger.exception("failed to probe memory")
            total_mem, free_mem, avail_mem = 0, 0, 0

        memory = dict(memory_total=total_mem, memory_free=free_mem, memory_available=avail_mem)

        return disk_spaces, loads, memory
Esempio n. 3
0
    def _scan_containers(self, figures_by_course):

        # initialize all known courses - we want data on all courses
        # even if they don't run any container yet
        logger.info(f"monitor cycle with period={self.period//60}' "
                    f"idle={self.idle//60}' "
                    f"lingering={self.lingering//3600}h")
        hash_by_course = {c.coursename : c.image_hash()
                          for c in CourseDir.objects.all()}

        with podman.ApiConnection(podman_url) as podman_api:
            # returns None when no container is found !
            containers = podman.containers.list_containers(podman_api, all=True) or []
        logger.info(f"found {len(hash_by_course)} courses "
                    f"and {len(containers)} containers")


        monitoreds = []
        for container in containers:
            try:
                name = container['Names'][0]
                coursename, student = name.split('-x-')
                figures_by_course.setdefault(coursename, CourseFigures())
                figures = figures_by_course[coursename]
                # may be None if s/t is misconfigured
                image_hash = hash_by_course[coursename] \
                       or f"hash not found for course {coursename}"
                monitoreds.append(MonitoredJupyter(
                    container, coursename, student,
                    figures, image_hash))
            # typically non-nbhosting containers
            except ValueError:
                # ignore this container as we don't even know
                # in what course it belongs
                logger.info(f"ignoring non-nbhosting {container}")
            except KeyError:
                # typically hash_by_course[coursename] is failing
                # this may happen when a course gets outdated
                logger.info(f"ignoring container {container} - "
                            f"can't find image hash for {coursename}")
            except Exception:
                logger.exception(f"monitor has to ignore {container}")
                                
        # run the whole stuff
        futures = [mon.co_run(self.idle, self.lingering)
                   for mon in monitoreds]
        
        #asyncio.run(asyncio.gather(*futures))
        asyncio.get_event_loop().run_until_complete(
            asyncio.gather(*futures))
        
        self.system_containers = len(monitoreds)
        self.system_kernels = sum((mon.nb_kernels or 0) for mon in monitoreds)
Esempio n. 4
0
    def kill_running_containers(self, *, containers=None, background=False):
        """
        kills containers passed as arguments
        
        typically containers should be the result of self.spot_running_containers()
        and this is what is being called if containers is not provided
        
        background says whether this call should return immedialtely (background=True)
        or wait until the containers are actually killed (background=False)

        """
        if containers is None:
            containers = self.spot_running_containers()
            
        with podman.ApiConnection(podman_url) as podman_api:
            for container in containers:
                podman.containers.kill(podman_api, container['Names'][0])
Esempio n. 5
0
 def spot_running_containers(self):
     """
     returns a list of containers that are currently
     running under this student's name
     
     """
     
     terminator = f"-x-{self.name}"
     
     with podman.ApiConnection(podman_url) as podman_api:
         # not specifying all=True means only the running ones
         containers = podman.containers.list_containers(podman_api)
     # keep only this student's containers
     # the drawback of using sparse=True however 
     # is that the container structures are not fully filled
     # hence this convoluted way of chking for their names
     containers = [
         container for container in containers 
         if container['Names'][0].endswith(terminator)
     ]
     return containers
Esempio n. 6
0
        def show_course(cd, max_name, max_image, max_groups):
            col_name = f"{max_name+1}s"
            col_groups = f"{max_groups+1}s"
            autopull = "[AP]" if cd.autopull else ""
            autobuild = "[AB]" if cd.autobuild else ""
            archived = "[AR]" if cd.archived else ""
            flags = "".join([x for x in (autopull, autobuild, archived) if x])
            flags = f"{flags:13s}"
            hash_part = f"{cd.current_hash():9s}"
            groups_part = f"{groups(cd):{col_groups}}"
            image = cd.image

            line = f"{cd.coursename:{col_name}}"
            if list_flag == 0:
                return line

            image_exists = None
            if list_flag >= 3:
                import podman
                podman_url = "unix://localhost/run/podman/podman.sock"
                with podman.ApiConnection(podman_url) as podman_api:
                    image_exists = podman.images.image_exists(podman_api, cd.image)
                warning = "!" if not image_exists else " "
                image = f"{warning}{image}{warning}"
                # we may have 2 more characters in the image part
                max_image += 2
            col_image = f"{max_image+1}s"

            image_part = f"{image:{col_image}}"
            line += image_part
            line += flags
            if list_flag == 1:
                return line
            line += hash_part
            line += groups_part
            line += f"{cd.giturl}"
            if image_exists is False:
                escape = chr(27)
                line = f"{escape}[1m{escape}[31m{line}{escape}[0m"
            return line
Esempio n. 7
0
 def remove_container(self):
     with podman.ApiConnection(podman_url) as podman_api:
         podman.containers.remove(podman_api, self.name)
Esempio n. 8
0
 def kill_container(self):
     # using a new connection each time turns out much more robust
     with podman.ApiConnection(podman_url) as podman_api:
         podman.containers.kill(podman_api, self.name)
Esempio n. 9
0
 def test_000_ctor(self):
     with podman.ApiConnection("unix:///") as api:
         pass
Esempio n. 10
0
 def test_001_join(self):
     with podman.ApiConnection("unix:///") as api:
         path = api.join("/unittest", {"q": "p"})
         self.assertEqual("{}/unittest?q=p".format(api.base), path)
Esempio n. 11
0
    def _run_once(self, show_details, show_idle):
        """
        The total number of containers is split like this:
        * total = stopped + running
          running = idle (0 kernels) + active (>= 1 kernel)
        
        Parameters:
          show_details: if True, print one line per container 
            with last activity and # of kernels
          show_idle: if True, compute the number of containers
            that have no kernel
        """

        with podman.ApiConnection(podman_url) as api:
            containers = podman.containers.list_containers(api)

        all_running = [c for c in containers if c['State'] == 'running']
        all_stopped = [c for c in containers if c['State'] != 'running']

        def monitored(container):
            name = container['Names'][0]
            course, student = name.split('-x-')
            # create one figures instance per container
            figures = CourseFigures()
            return MonitoredJupyter(container, course, student, figures, None)

        running_monitoreds = [
            monitored(container) for container in all_running
        ]

        if show_details or show_idle:
            # probe them to fill las_activity and number_kernels
            futures = [
                mon.count_running_kernels() for mon in running_monitoreds
            ]
            #loop.run_until_complete(asyncio.gather(*futures))
            for future in futures:
                loop.run_until_complete(future)

        if show_details:

            running_monitoreds.sort(key=lambda mon: mon.last_activity or 0,
                                    reverse=True)
            now = time.time()
            width = max((len(c.name) for c in running_monitoreds), default=10)
            for index, mon in enumerate(running_monitoreds, 1):
                if mon.nb_kernels:
                    # xxx this somehow shows up UTC
                    # maybe it simply needs USE_TZ = True in the django settings
                    la = mon.last_activity_human()
                    ellapsed = int(now - mon.last_activity) // 60
                    print(
                        f"{index:<3d}{mon.name:>{width}s} [{mon.nb_kernels:>2d}k] "
                        f"last active {la} - {ellapsed:>3d} min ago")
                else:
                    display = '?' if mon.nb_kernels is None else 0
                    print(f"{index:<3d}{mon.name:>{width}s} [-{display}-] ")

        if show_details:
            ban = self.now()
            sep = "\n"
        else:
            ban = sep = ""

        def print_line(stopped, monitoreds, msg):
            if show_idle:
                nb_stopped = len(stopped)
                nb_idle = sum((mon.nb_kernels == 0 or mon.nb_kernels is None)
                              for mon in monitoreds)
                nb_active = len(monitoreds) - nb_idle
                total_kernels = sum(
                    (mon.nb_kernels or 0) for mon in monitoreds)
                total = nb_stopped + nb_idle + nb_active
                print(self.now(), f"{msg} {nb_stopped} stopped + "
                      f"({nb_idle} idle + {nb_active} active) "
                      f"= {total} containers"
                      f" with {total_kernels} kernels",
                      end=sep)
            else:
                nb_stopped = len(stopped)
                nb_running = len(monitoreds)
                total = nb_stopped + nb_running
                print(self.now(), f"{msg} {nb_stopped} stopped + "
                      f"{nb_running} running = {total} "
                      f"containers",
                      end=sep)

        print_line(all_stopped, running_monitoreds, "ALL")
        if self.patterns:
            selected_stopped = [c for c in all_stopped if self.in_scope(c)]
            selected_running = [
                mon for mon in running_monitoreds if self.in_scope(mon)
            ]
            if self.continuous:
                print()
            print_line(selected_stopped, selected_running, "SEL")