def smart_login(self, user=None, host="romeo", node=1, gpus=1):

        status = self.queue(host=host, user=user)
        # VERBOSE(locals())

        print(Printer.attribute(status, header=["Node", "Used GPUs"]))

        #
        # Required node not available (down, drained or reserved)
        #

        reserved = self.reserved_nodes(user=user)

        def hostnames(host):
            if host == "volta":
                names = Parameter.expand("r-00[5-6]")
            else:
                names = Parameter.expand("r-00[3-4]")

            max_gpus = 8  # this is for now hard coded
            valid = []
            for name in names:
                if name not in reserved and status[name] + gpus <= max_gpus:
                    valid.append(name)
            return valid

        def find_random(host):
            names = hostnames(host)

            if len(names) == 0 or names is None:
                return None
            id = random.randint(0, len(host) - 1)
            return names[id]

        def find_first(host):

            names = hostnames(host)

            if names is None or len(names) == 0:
                return None
            else:
                return names[0]

        if node is None or node == "first":
            node = find_first(host)

        if node is None or node == "random":
            node = find_random(host)

        if node is not None:
            Console.ok(f"Login on node {host}: {node}")

            self.login(user=user, host=host, node=node, gpus=gpus)
        else:
            Console.error(f"not enough GPUs available: {host}: {node}")
Example #2
0
    def benchmark(cls,
                  sysinfo=True,
                  csv=True,
                  prefix="# csv",
                  tag=None,
                  sum=True):
        """
        prints out all timers in a convenient benchmark table
        :return:
        :rtype:
        """

        #
        # PRINT PLATFORM
        #

        print()
        data_platform = systeminfo()

        data_platform['cpu_count'] = multiprocessing.cpu_count()

        if sysinfo:
            print(Printer.attribute(
                data_platform,
                order=["Machine Attribute", "Value"],
                output="table"
            ))

        #
        # PRINT TIMERS
        #
        timers = StopWatch.keys()

        if len(timers) > 0:

            data_timers = {}
            for timer in timers:
                data_timers[timer] = {
                    'start': time.strftime("%Y-%m-%d %H:%M:%S",
                                           time.gmtime(
                                               StopWatch.timer_start[timer])),
                    'time': StopWatch.get(timer, digits=3),
                    'sum': StopWatch.sum(timer, digits=3),
                    'status': StopWatch.get_status(timer),
                    'timer': timer,
                    'tag': tag or ''
                }

                for attribute in ["uname.node",
                                  "user",
                                  "uname.system",
                                  "uname.machine",
                                  "platform.version",
                                  "sys.platform"]:
                    data_timers[timer][attribute] = data_platform[attribute]

            # print(Printer.attribute(data_timers, header=["Command", "Time/s"]))

            if 'benchmark_start_stop' in data_timers:
                del data_timers['benchmark_start_stop']

            for key in data_timers:
                if key != 'benchmark_start_stop' and data_timers[key][
                    'status'] == None:
                    data_timers[key]['status'] = "failed"
                elif data_timers[key]['status'] != None and data_timers[key][
                    'status'] == True:
                    data_timers[key]['status'] = "ok"

            order = [
                "timer",
                "status",
                "time",
                "sum",
                "start",
                "tag",
                "uname.node",
                "user",
                "uname.system",
                "platform.version"
            ]

            header = [
                "Name",
                "Status",
                "Time",
                "Sum",
                "Start",
                "tag",
                "Node",
                "User",
                "OS",
                "Version"
            ]

            print()
            print(Printer.write(
                data_timers,
                order=order,
                header=header,
                output="table"

            ))
            print()

            if csv:
                if prefix is not None:
                    for entry in data_timers:
                        data_timers[entry]["# csv"] = prefix

                    order = ["# csv"] + order

                    print(Printer.write(
                        data_timers,
                        order=order,
                        header=header,
                        output="csv"
                    ))
                else:

                    pprint(data_timers)

                    print(Printer.write(
                        data_timers,
                        order=order[1:],
                        output="csv"
                    ))

        else:

            print("ERROR: No timers found")
Example #3
0
 def config(self, config):
     Console.info("config")
     print(Printer.attribute(config))