def get_summary_infos(project_name, jobs_stats): def format_cpu(job_stats): if job_stats.get('cpu_percent') is None: return '-' used = '%.1f' % (job_stats.get('cpu_percent') / 100) requested = '%.1f' % job_stats.job.cpus available = '-' if job_stats.get('percpu_percent'): available = '%d' % len(job_stats.get('percpu_percent')) return '{:>3}/{} |{}'.format(used, available, requested.rstrip('0').rstrip('.')) def format_mem(jobs_stats): available = job_stats.get('memory_limit', '%.1f', bytes_to_gib) used = job_stats.get('memory_used', '%.1f', bytes_to_gib) requested = '%.1f' % mib_to_gib(job_stats.job.mem) return '{:>3}/{} |{}'.format(used, available.rstrip('0').rstrip('.'), requested.rstrip('0').rstrip('.')) def format_gpu(jobs_stats): requested = '%d' % job_stats.job.gpus if requested == 0 or job_stats.get('gpu_percent') is None: return ' -' else: used = '%.1f' % (job_stats.get('gpu_percent') / float(100)) return '{:>3}/{}'.format(used, requested) def format_gpu_mem(jobs_stats): if job_stats.get('gpu_memory_total') is None: return ' -' available = job_stats.get('gpu_memory_total', '%.1f', bytes_to_gib) used = job_stats.get('gpu_memory_used', '%.1f', bytes_to_gib) return '{:>3}/{}'.format(used, available.rstrip('0').rstrip('.')) rows = [] output = StringIO() for job_stats in jobs_stats: job = job_stats.job if job.state in (JobState.running, JobState.serving): rows.append([ job.short_id, project_name, '%s%s' % (get_state_symbol(job.state), job.state), format_cpu(job_stats), format_mem(job_stats), format_gpu(job_stats), format_gpu_mem(job_stats) ]) else: rows.append([job.short_id, project_name, '%s%s' % (get_state_symbol(job.state), job.state)] + \ ['', '', '', '']) print_table( header=['ID', 'PROJECT', 'STATE', 'CPU', 'MEM', 'GPU', 'GPU MEM'], min_widths=[4, 8, 6, 10, 10, 3, 10], rows=rows, file=output, column_spaces=2) return output.getvalue()
def _get_experiment_rows(experiments, all=False, collapsed=True, users=False): rows = [] for experiment in experiments: if not all and experiment.state in ['FINISHED', 'KILLED', 'FAILED']: continue values = [experiment.short_id] if users: values += [experiment.user.username] values += [ experiment.project.name, '%s%s' % (util.get_state_symbol(experiment.state), experiment.state), util.get_since_str(experiment.created_at), experiment.type ] if not collapsed: values += ['', result(experiment)] rows.append(values) if not collapsed and experiment.children: rows += get_experiments_rows(experiment, with_user=users) return rows
def get_experiments_rows(group, with_project=True, with_type=True, with_params=True, with_user=False, indent=True, with_result=True): rows = [] for i, experiment in enumerate(group.children): indent_str = (u'├╴' if i < len(group.children) - 1 else u'╰╴') if indent else '' values = [indent_str + experiment.short_id] if with_user: values += [group.user.username] if with_project: values += [group.project.name] values += [u'%s%s' % (util.get_state_symbol(experiment.state), experiment.state), util.get_since_str(experiment.created_at)] if with_type: values += [indent_str + 'Experiment'] if with_params: values += [params(experiment)] if with_result: values += [result(experiment)] rows.append(values) return rows
def show_job_table(jobs): rows = [ ([job.short_id, '%s%s' % (util.get_state_symbol(job.state), job.state), util.get_since_str(job.started_at), util.get_since_str(job.finished_at), job.reason or '', job.message[:17] + '...' if job.message and len(job.message) > 20 else job.message or '', job.exit_code if job.exit_code is not None else '', '%d' % job.gpus, '%.1f' % job.cpus, '%d' % job.mem]) for job in jobs ] util.print_table( header=['JOB ID', 'STATE', 'STARTED', 'FINISHED', 'REASON', 'MESSAGE', 'EXIT CODE', 'GPU', 'CPU', 'MEM'], min_widths=[13, 13, 13, 13, 13, 20, 10, 6, 6, 6], rows=rows )
def get_detailed_info(job_stats): output = StringIO() job = job_stats.job caption = bold('%s (STATE: %s)' % (job.short_id, '%s%s' % (get_state_symbol(job.state), job.state))) if job.gpus == 0: return '\n'.join([caption, indent('Experiment uses no GPUs')]) if job.state in ('RUNNING', 'SERVING'): #total_gib = job_stats.get('memory_limit', '%.1f', bytes_to_gib) #used_gib = job_stats.get('memory_used', '%.1f', bytes_to_gib) #memory = 'Memory Stats (Used/Total) GB: %s / %s' % (used_gib, total_gib) # cpu_bars = get_cpu_bars(job.cpus, # job_stats.get('percpu_percent')) gpu_table = get_gpu_table(job_stats) return '\n'.join([caption, indent(gpu_table)]) else: return '\n'.join([caption, indent('No real-time stats available')])
def show_experiment_group(group): print("ID: {}".format(group.short_id)) print("Type: Set") print("State: {}{}".format(util.get_state_symbol(group.state), group.state)) print("Project: {}".format(group.project.name)) if group.framework == 'tensorflow' and group.framework_config.get('tensorboard', False): tensorboard_job = next(job for job in group.jobs if job.role == 'tensorboard') if tensorboard_job.state in ['RUNNING']: print("Tensorboard: {}".format(util.tensorboard_job_url(tensorboard_job))) else: print("Tensorboard: OFFLINE") print() util.print_table( header=['EXP ID', 'STATE', 'AGE', 'PARAMS', 'RESULT'], min_widths=(6, 9, 13, 14, 14), rows=get_experiments_rows(group, with_project=False, with_type=False, indent=False) ) if group.jobs: print() show_job_table(group.jobs)
def show_common_header(entity, type): print("ID: {}".format(entity.short_id)) print("Type: {}".format(type)) print("State: {}{}".format(util.get_state_symbol(entity.state), entity.state)) show_timing(entity.started_at, entity.finished_at)
def show_common_header(entity, type): print("ID: {}".format(entity.short_id)) print("Type: {}".format(type)) print("State: {}{}".format(util.get_state_symbol(entity.state), entity.state))