def smart_login(self, user=None, host="romeo", node=1, gpus=1): status = self.queue(host=host, user=user) # VERBOSE(locals()) print(Printer.attribute(status, header=["Node", "Used GPUs"])) # # Required node not available (down, drained or reserved) # reserved = self.reserved_nodes(user=user) def hostnames(host): if host == "volta": names = Parameter.expand("r-00[5-6]") else: names = Parameter.expand("r-00[3-4]") max_gpus = 8 # this is for now hard coded valid = [] for name in names: if name not in reserved and status[name] + gpus <= max_gpus: valid.append(name) return valid def find_random(host): names = hostnames(host) if len(names) == 0 or names is None: return None id = random.randint(0, len(host) - 1) return names[id] def find_first(host): names = hostnames(host) if names is None or len(names) == 0: return None else: return names[0] if node is None or node == "first": node = find_first(host) if node is None or node == "random": node = find_random(host) if node is not None: Console.ok(f"Login on node {host}: {node}") self.login(user=user, host=host, node=node, gpus=gpus) else: Console.error(f"not enough GPUs available: {host}: {node}")
def benchmark(cls, sysinfo=True, csv=True, prefix="# csv", tag=None, sum=True): """ prints out all timers in a convenient benchmark table :return: :rtype: """ # # PRINT PLATFORM # print() data_platform = systeminfo() data_platform['cpu_count'] = multiprocessing.cpu_count() if sysinfo: print(Printer.attribute( data_platform, order=["Machine Attribute", "Value"], output="table" )) # # PRINT TIMERS # timers = StopWatch.keys() if len(timers) > 0: data_timers = {} for timer in timers: data_timers[timer] = { 'start': time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime( StopWatch.timer_start[timer])), 'time': StopWatch.get(timer, digits=3), 'sum': StopWatch.sum(timer, digits=3), 'status': StopWatch.get_status(timer), 'timer': timer, 'tag': tag or '' } for attribute in ["uname.node", "user", "uname.system", "uname.machine", "platform.version", "sys.platform"]: data_timers[timer][attribute] = data_platform[attribute] # print(Printer.attribute(data_timers, header=["Command", "Time/s"])) if 'benchmark_start_stop' in data_timers: del data_timers['benchmark_start_stop'] for key in data_timers: if key != 'benchmark_start_stop' and data_timers[key][ 'status'] == None: data_timers[key]['status'] = "failed" elif data_timers[key]['status'] != None and data_timers[key][ 'status'] == True: data_timers[key]['status'] = "ok" order = [ "timer", "status", "time", "sum", "start", "tag", "uname.node", "user", "uname.system", "platform.version" ] header = [ "Name", "Status", "Time", "Sum", "Start", "tag", "Node", "User", "OS", "Version" ] print() print(Printer.write( data_timers, order=order, header=header, output="table" )) print() if csv: if prefix is not None: for entry in data_timers: data_timers[entry]["# csv"] = prefix order = ["# csv"] + order print(Printer.write( data_timers, order=order, header=header, output="csv" )) else: pprint(data_timers) print(Printer.write( data_timers, order=order[1:], output="csv" )) else: print("ERROR: No timers found")
def config(self, config): Console.info("config") print(Printer.attribute(config))