def gpu_resources(cls, jobs_resources): jobs_resources = to_list(jobs_resources) click.clear() data = [[ 'job_name', 'name', 'GPU Usage', 'GPU Mem Usage / Total', 'GPU Temperature', 'Power Draw / Limit' ]] non_gpu_jobs = 0 for job_resources in jobs_resources: job_resources = ContainerResourcesConfig.from_dict(job_resources) line = [] if not job_resources.gpu_resources: non_gpu_jobs += 1 continue for gpu_resources in job_resources.gpu_resources: line += [ job_resources.job_name, gpu_resources.name, to_percentage(gpu_resources.utilization_gpu / 100), '{} / {}'.format( to_unit_memory(gpu_resources.memory_used), to_unit_memory(gpu_resources.memory_total)), gpu_resources.temperature_gpu, '{} / {}'.format(gpu_resources.power_draw, gpu_resources.power_limit), ] data.append(line) if non_gpu_jobs == len(jobs_resources): Printer.print_error( 'No GPU job was found, please run `resources` command without `-g | --gpu` option.' ) exit(1) click.echo(tabulate(data, headers="firstrow")) sys.stdout.flush()
def resources(cls, jobs_resources): jobs_resources = to_list(jobs_resources) click.clear() data = [['Job', 'Mem Usage / Total', 'CPU% - CPUs']] for job_resources in jobs_resources: job_resources = ContainerResourcesConfig.from_dict(job_resources) line = [ job_resources.job_name, '{} / {}'.format(to_unit_memory(job_resources.memory_used), to_unit_memory(job_resources.memory_limit)), '{} - {}'.format( to_percentage(job_resources.cpu_percentage / 100), job_resources.n_cpus) ] data.append(line) click.echo(tabulate(data, headers="firstrow")) sys.stdout.flush()
def humanize_attrs(cls, obj): humanized_attrs = {} for attr in cls.DATETIME_ATTRIBUTES: humanized_attrs[attr] = humanize_timesince(getattr(obj, attr)) for attr in cls.PERCENT_ATTRIBUTES: humanized_attrs[attr] = to_percentage(getattr(obj, attr), cls.ROUNDING) for attr in cls.MEM_SIZE_ATTRIBUTES: humanized_attrs[attr] = to_unit_memory(getattr(obj, attr)) return humanized_attrs
def cluster_nodes_analytics(): cluster = get_cluster_resources() notification = uuid.uuid4() notification_url = settings.POLYAXON_NOTIFICATION_CLUSTER_NODES_URL.format( url=settings.CLUSTER_NOTIFICATION_URL, cluster_uuid=cluster.uuid.hex, n_nodes=cluster.n_nodes, n_cpus=cluster.n_cpus, memory=to_unit_memory(cluster.memory or 0), n_gpus=cluster.n_gpus, notification=notification, version=settings.CHART_VERSION) try: requests.get(notification_url) except requests.RequestException: pass
def add_memory_unit(cls, obj_dict, keys): keys = to_list(keys) for key in keys: obj_dict[key] = to_unit_memory(obj_dict[key]) return obj_dict