def resources(cls, jobs_resources): jobs_resources = to_list(jobs_resources) click.clear() data = [["Job", "Mem Usage / Total", "CPU% - CPUs"]] for job_resources in jobs_resources: job_resources = ContainerResourcesConfig.from_dict(job_resources) line = [ job_resources.job_name, "{} / {}".format( to_unit_memory(job_resources.memory_used), to_unit_memory(job_resources.memory_limit), ), "{} - {}".format( to_percentage(job_resources.cpu_percentage / 100), job_resources.n_cpus, ), ] data.append(line) click.echo(tabulate(data, headers="firstrow")) sys.stdout.flush()
def gpu_resources(cls, jobs_resources): jobs_resources = to_list(jobs_resources) click.clear() data = [[ "job_name", "name", "GPU Usage", "GPU Mem Usage / Total", "GPU Temperature", "Power Draw / Limit", ]] non_gpu_jobs = 0 for job_resources in jobs_resources: job_resources = ContainerResourcesConfig.from_dict(job_resources) line = [] if not job_resources.gpu_resources: non_gpu_jobs += 1 continue for gpu_resources in job_resources.gpu_resources: line += [ job_resources.job_name, gpu_resources.name, to_percentage(gpu_resources.utilization_gpu / 100), "{} / {}".format( to_unit_memory(gpu_resources.memory_used), to_unit_memory(gpu_resources.memory_total), ), gpu_resources.temperature_gpu, "{} / {}".format(gpu_resources.power_draw, gpu_resources.power_limit), ] data.append(line) if non_gpu_jobs == len(jobs_resources): Printer.print_error( "No GPU job was found, please run `resources` command without `-g | --gpu` option." ) exit(1) click.echo(tabulate(data, headers="firstrow")) sys.stdout.flush()
def humanize_attrs(key, value, rounding=2): if key in [ "created_at", "updated_at", "started_at", "finished_at", "last_update_time", "last_transition_time", ]: return humanize_timesince(value) if key in ["cpu_percentage"]: return to_percentage(value, rounding) if key in ["memory_free", "memory_used", "memory_total"]: return to_unit_memory(value) return value
def cluster_nodes_analytics() -> None: cluster = get_cluster_resources() notification = uuid.uuid4() notification_url = conf.get(CLUSTER_NOTIFICATION_NODES_URL).format( url=conf.get(CLUSTER_NOTIFICATION_URL), cluster_uuid=cluster.uuid.hex, n_nodes=cluster.n_nodes, n_cpus=cluster.n_cpus, memory=to_unit_memory(cluster.memory or 0), n_gpus=cluster.n_gpus, notification=notification, version=conf.get(CHART_VERSION)) try: requests.get(notification_url) except requests.RequestException: pass
def add_memory_unit(cls, obj_dict, keys): keys = to_list(keys) for key in keys: obj_dict[key] = to_unit_memory(obj_dict[key]) return obj_dict