def show_experiment_group(group): show_common_header(group, 'Set') if group.framework == 'tensorflow' and group.framework_config.get( 'tensorboard', False): tensorboard_job = next(job for job in group.jobs if job.role == 'tensorboard') if tensorboard_job.state in ['RUNNING']: print("Tensorboard: {}".format( util.tensorboard_job_url(tensorboard_job))) else: print("Tensorboard: OFFLINE") print() util.print_table( header=['EXP ID', 'STATE', 'STARTED', 'DURATION', 'PARAMS', 'RESULT'], min_widths=(6, 9, 11, 9, 14, 14), rows=get_experiments_rows(group, with_project=False, with_type=False, indent=False, use_started_at=True, with_duration=True)) if group.jobs: print() show_job_table(group.jobs)
def display_gpus(nodes): nodes = filter(lambda n: n.schedulable, nodes) def get_device_id(device_name): if device_name.startswith('/dev/nvidia'): return int(device_name[len('/dev/nvidia'):]) rows = [] for n in nodes: gpus = 0 # consistent reporting of GPUs if n.gpus_allocatable == len(n.gpus): gpus = len(n.gpus) gpu_mem = sum([gpu.mem for gpu in n.gpus]) if not gpus: continue sorted_gpus = sorted(n.gpus, key=lambda x: get_device_id(x.device)) for i, gpu in enumerate(sorted_gpus): rows.append([ n.hostname if i == 0 else '', n.nvidia_driver if i == 0 else '', gpu.name, get_device_id(gpu.device), '%.1f' % bytes_to_gib(gpu.mem), gpu.serial ]) print_table( header=['NODE', 'DRIVER', 'NAME', 'ID', 'MEM', 'SERIAL'], min_widths=[2, 3, 3, 2, 3, 3], rows=rows, column_spaces=2, )
def get_summary_infos(project_name, jobs_stats): def format_cpu(job_stats): if job_stats.get('cpu_percent') is None: return '-' used = '%.1f' % (job_stats.get('cpu_percent') / 100) requested = '%.1f' % job_stats.job.cpus available = '-' if job_stats.get('percpu_percent'): available = '%d' % len(job_stats.get('percpu_percent')) return '{:>3}/{} |{}'.format(used, available, requested.rstrip('0').rstrip('.')) def format_mem(jobs_stats): available = job_stats.get('memory_limit', '%.1f', bytes_to_gib) used = job_stats.get('memory_used', '%.1f', bytes_to_gib) requested = '%.1f' % mib_to_gib(job_stats.job.mem) return '{:>3}/{} |{}'.format(used, available.rstrip('0').rstrip('.'), requested.rstrip('0').rstrip('.')) def format_gpu(jobs_stats): requested = '%d' % job_stats.job.gpus if requested == 0 or job_stats.get('gpu_percent') is None: return ' -' else: used = '%.1f' % (job_stats.get('gpu_percent') / float(100)) return '{:>3}/{}'.format(used, requested) def format_gpu_mem(jobs_stats): if job_stats.get('gpu_memory_total') is None: return ' -' available = job_stats.get('gpu_memory_total', '%.1f', bytes_to_gib) used = job_stats.get('gpu_memory_used', '%.1f', bytes_to_gib) return '{:>3}/{}'.format(used, available.rstrip('0').rstrip('.')) rows = [] output = StringIO() for job_stats in jobs_stats: job = job_stats.job if job.state in (JobState.running, JobState.serving): rows.append([ job.short_id, project_name, '%s%s' % (get_state_symbol(job.state), job.state), format_cpu(job_stats), format_mem(job_stats), format_gpu(job_stats), format_gpu_mem(job_stats) ]) else: rows.append([job.short_id, project_name, '%s%s' % (get_state_symbol(job.state), job.state)] + \ ['', '', '', '']) print_table( header=['ID', 'PROJECT', 'STATE', 'CPU', 'MEM', 'GPU', 'GPU MEM'], min_widths=[4, 8, 6, 10, 10, 3, 10], rows=rows, file=output, column_spaces=2) return output.getvalue()
def show_experiments(experiments, all=False, collapsed=True, users=False): headers, widths = _get_status_headers(collapsed, users) rows = _get_experiment_rows(experiments, all, collapsed, users) util.print_table( header=headers, min_widths=widths, rows=rows )
def run_list(args): api_client = ApiClient() client = AdminApi(api_client) users = call_api(lambda: client.get_users()) rows = [] for u in users: rows.append([u.username, u.email, str(u.is_enabled)]) print_table(header=['Username', 'Email', 'Enabled'], min_widths=[12, 6, 9], column_spaces=2, rows=rows)
def display_long(nodes): rows = [] nodes = filter(lambda n: n.schedulable, nodes) total_cpus = 0 total_mem = 0 total_gpus = 0 total_gpu_mem = 0 for n in nodes: gpus = 0 gpu_mem = 0 # consistent reporting of GPUs if n.gpus_allocatable == len(n.gpus): gpus = len(n.gpus) gpu_mem = sum([gpu.mem for gpu in n.gpus]) total_cpus += n.cpus total_mem += n.mem total_gpus += gpus total_gpu_mem += gpu_mem rows.append([ n.hostname, n.cpus, format_float(bytes_to_gib(n.mem)), gpus, format_float(bytes_to_gib(gpu_mem)), n.nvidia_driver if n.nvidia_driver != 'NOT FOUND' else '-', n.kubelet_version.lstrip('v'), n.docker_version ]) rows.append(TableRowDelimiter('-')) rows.append([ 'Total', total_cpus, format_float(bytes_to_gib(total_mem)), total_gpus, format_float(bytes_to_gib(total_gpu_mem)), '', '', '' ]) print_table( header=[ 'NODE', 'CPU', 'MEM', 'GPU', 'GPU MEM', 'NVIDIA DRIVER', 'KUBELET VERSION', 'DOCKER VERSION' ], min_widths=[18, 3, 3, 3, 7, 3, 3, 3], rows=rows, column_spaces=2, )
def show_job_table(jobs): rows = [ ([job.short_id, '%s%s' % (util.get_state_symbol(job.state), job.state), util.get_since_str(job.started_at), util.get_since_str(job.finished_at), job.reason or '', job.message[:17] + '...' if job.message and len(job.message) > 20 else job.message or '', job.exit_code if job.exit_code is not None else '', '%d' % job.gpus, '%.1f' % job.cpus, '%d' % job.mem]) for job in jobs ] util.print_table( header=['JOB ID', 'STATE', 'STARTED', 'FINISHED', 'REASON', 'MESSAGE', 'EXIT CODE', 'GPU', 'CPU', 'MEM'], min_widths=[13, 13, 13, 13, 13, 20, 10, 6, 6, 6], rows=rows )
def get_gpu_table(job_stats): def format_gpu_mem(gpu_stats): if gpu_stats.get('memory_total') is None: return ' -' available = gpu_stats.get('memory_total', '%.1f', bytes_to_gib) used = gpu_stats.get('memory_used', '%.1f', bytes_to_gib) return '{:>3}/{}'.format(used, available.rstrip('0').rstrip('.')) def format_gpu_pwr(gpu_stats): if gpu_stats.get('power_limit') is None: return ' -' limit = gpu_stats.get('power_limit', '%d') used = gpu_stats.get('power_draw', '%d') return '{:>3}/{}W'.format(used, limit) rows = [] output = StringIO() for gpu_index, gpu_dev in enumerate(job_stats.gpus): gpu_stats = job_stats.gpu_stats[gpu_dev] row = [ gpu_index, gpu_stats.get('name', '%s'), gpu_stats.get('gpu_utilization', '%d%%'), format_gpu_mem(gpu_stats), format_gpu_pwr(gpu_stats), gpu_stats.get('temperature', '%dC'), gpu_stats.get('device_bus_id', '%s') ] rows.append(row) for _ in range(job_stats.job.gpus - len(job_stats.gpus)): row = ['N/A'] + ['' for _ in range(6)] rows.append(row) if rows: print_table( header=['ID', 'NAME', 'UTIL', 'MEM', 'POWER', 'TEMP', 'BUS ID'], min_widths=[3, 8, 4, 6, 3, 3, 3], rows=rows, bold_header=False, column_spaces=2, file=output) return output.getvalue().strip()
def display_short(nodes): rows = [] nodes = filter(lambda n: n.schedulable, nodes) total_cpus = 0 total_mem = 0 total_gpus = 0 total_gpu_mem = 0 for n in nodes: gpus = 0 gpu_mem = 0 # consistent reporting of GPUs if n.gpus_allocatable == len(n.gpus): gpus = len(n.gpus) gpu_mem = sum([gpu.mem for gpu in n.gpus]) total_cpus += n.cpus total_mem += n.mem total_gpus += gpus total_gpu_mem += gpu_mem rows.append([ n.hostname, n.cpus, '%.1f' % bytes_to_gib(n.mem), gpus, format_float(bytes_to_gib(gpu_mem)) ]) rows.append(TableRowDelimiter('-')) rows.append([ 'Total', total_cpus, format_float(bytes_to_gib(total_mem)), total_gpus, '%.1f' % bytes_to_gib(total_gpu_mem) ]) print_table( header=['NODE', 'CPU', 'MEM', 'GPU', 'GPU MEM'], min_widths=[18, 3, 3, 3, 7], rows=rows, column_spaces=2, )
def show_experiment_group(group): print("ID: {}".format(group.short_id)) print("Type: Set") print("State: {}{}".format(util.get_state_symbol(group.state), group.state)) print("Project: {}".format(group.project.name)) if group.framework == 'tensorflow' and group.framework_config.get('tensorboard', False): tensorboard_job = next(job for job in group.jobs if job.role == 'tensorboard') if tensorboard_job.state in ['RUNNING']: print("Tensorboard: {}".format(util.tensorboard_job_url(tensorboard_job))) else: print("Tensorboard: OFFLINE") print() util.print_table( header=['EXP ID', 'STATE', 'AGE', 'PARAMS', 'RESULT'], min_widths=(6, 9, 13, 14, 14), rows=get_experiments_rows(group, with_project=False, with_type=False, indent=False) ) if group.jobs: print() show_job_table(group.jobs)