예제 #1
0
def gpu_usage():
    gpu = '\n====GPU USAGE====\n'
    type = 'Type: ' + str(nvgpu.gpu_info()[0]['type']) + '\n'
    memuse = 'Memory Used: ' + str(nvgpu.gpu_info()[0]['mem_used']) + '\n'
    memtot = 'Memory Total: ' + str(nvgpu.gpu_info()[0]['mem_total']) + '\n'
    mempercentage = 'Memory Percentage: ' + str(
        int(nvgpu.gpu_info()[0]['mem_used_percent'])) + '%\n'
    message_gpu = gpu + type + memuse + memtot + mempercentage
    return message_gpu
예제 #2
0
def check_availability():
    try:
        nvgpu.gpu_info()
        available = True
    except FileNotFoundError as exp:
        if exp.filename == 'nvidia-smi':
            available = False
        else:
            return False

    return available
예제 #3
0
def get_gpu_info():
    if which('nvidia-smi') is not None:
        return json.dumps(nvgpu.gpu_info())

    else:
        return json.dumps(
            ["Couldn't retrieve GPU information - probably not nvidia card."])
def _allocate_gpu(num_gpus):
    current_user = pwd.getpwuid(os.getuid()).pw_name
    gpu_info = nvgpu.gpu_info()
    device_info = device_statuses()

    # assume nothing is available
    completely_available = [False for _ in gpu_info]
    same_user_available = [False for _ in gpu_info]

    for i, (_info, _device) in enumerate(zip(gpu_info, device_info)):
        completely_available[i] = _device['is_available']
        unique_current_users = list(set(_device['users']))

        # if there's space on the gpu...
        if _info['mem_used_percent'] < mem_threshold:
            # ...and you're on this gpu...
            if current_user in unique_current_users:
                #...and you're the only one on this gpu...
                if len(unique_current_users) == 1:
                    # then allocate the gpu.
                    same_user_available[i] = True

    available_gpus = same_user_available
    if sum(same_user_available) == 0:
        available_gpus = completely_available

    available_gpus = [i for i, val in enumerate(available_gpus) if val]

    return available_gpus[:num_gpus]
예제 #5
0
    def _attempt_switch(self):
        now = time.time()
        if now - self.t >= self.threshold:
            gpus_usage = gpu_info()

            # Selecting the GPU with minimum memory usage
            if len(self.gpu_ids) > 1 and random.random() > 0.5:
                min_memory = min(
                    [gpus_usage[i]['mem_used_percent'] for i in self.gpu_ids])
                gpu_indices = list(
                    filter(
                        lambda el: gpus_usage[el]['mem_used_percent'] ==
                        min_memory, self.gpu_ids))

                if self.curr_gpu not in gpu_indices:
                    self.curr_gpu = random.choice(gpu_indices)
                    torch.cuda.set_device(self.curr_gpu)

            # Switching to GPU
            used = gpus_usage[self.curr_gpu]['mem_used_percent'] / 100
            if used < self.usage_threshold and random.random(
            ) > used / self.usage_threshold:
                self.device = self._switch(GPU)
            else:
                self.t_threshold = random.uniform(10, 30)
                self.t = time.time()
예제 #6
0
def QueryResourceUsages(self):
    """Method which returns information on CPU, CPU RAM and GPU RAM.

    Inputs:
    None

    Outputs:
    res (dict) - A dictionary of the following structure:
    {"Item1":XX,...}
    """
    res = {}
    l = []
    for i in range(avgs):
        l.append(psutil.cpu_percent(0.2))
    res['cpu_avg'] = np.mean(l)
    res['cpu_max'] = np.max(l)

    #Getting Memory Usage
    res['ram'] = psutil.virtual_memory()[2]
    res['swp'] = psutil.swap_memory()[3]

    #Getting GPU Usage:
    gpu_stats = nvgpu.gpu_info()
    for i in range(len(gpu_stats)):
        res['gpu_'+str(i)+'_ram'] = gpu_stats[i]['mem_used_percent']

    return res
예제 #7
0
def info(device=None, memsort=False):
    if not check_availability():
        return []

    try:
        if type(device) == str:
            return next(x for x in nvgpu.gpu_info() if x['uuid'] == device)
        if type(device) == int:
            return next(x for x in nvgpu.gpu_info()
                        if int(x['index']) == device)
    except StopIteration:
        raise Exception(f'Failed to find device: {device}')

    devices = nvgpu.gpu_info()
    if memsort:
        devices.sort(key=lambda x: x['mem_total'] - x['mem_used'],
                     reverse=True)
    return devices
예제 #8
0
def info():
    data = {
        'Architecture': '',
        'CPUopmodes': '',
        'ByteOrder': '',
        'CPUs': 0,
        'OnlineCPUslist': '',
        'Threadspercore': 0,
        'Corespersocket': 0,
        'Sockets': 0,
        'NUMAnodes': 0,
        'VendorID': '',
        'CPUfamily': 0,
        'Model': 0,
        'Modelname': '',
        'Stepping': 0,
        'CPUMHz': 0,
        'CPUmaxMHz': 0,
        'CPUminMHz': 0,
        'BogoMIPS': 0,
        'Virtualization': '',
        'L1dcache': '',
        'L1icache': '',
        'L2cache': '',
        'L3cache': '',
        'NUMAnode0CPUs': '',
        'System': platform.system()
    }
    if data['System'] == 'Windows':
        data['CPUmaxMHz'] = 0
        data['CPUs'] = multiprocessing.cpu_count()
        data['Threadspercore'] = 0
    elif data['System'] == 'Linux':
        cpu_raw = (subprocess.check_output("lscpu", shell=True)).strip().decode()\
            .replace(' ', '').replace('(', '').replace(')', '').replace('-', '').split('\n')
        cpu_data = {}
        for x in cpu_raw:
            kv = x.split(':')
            if '.' in kv[1]:
                kv[1] = float(kv[1])
            else:
                try:
                    kv[1] = int(kv[1])
                except ValueError:
                    pass
            cpu_data[kv[0]] = kv[1]
        del cpu_data['Flags']
        data.update(cpu_data)

    gpu = nvgpu.gpu_info()
    data['GPUcount'] = len(gpu)
    gpu_type = []
    for i in range(len(gpu)):
        gpu_type.append(gpu[i]['type'])
    data['GPUtype'] = gpu_type
    return data
예제 #9
0
def get_gpu_memory_usage_percentage(mode=0):
    """There may be error on Windows, this method doesn't do error checking"""
    if mode == 0:
        return np.nan
    elif mode == 1:
        return sum([info['mem_used_percent'] for info in nvgpu.gpu_info()])
    elif mode == 2:
        return float(get_gpu_memory_usage(mode=2)) / float(
            get_total_gpu_memory(mode=2))
    else:
        raise ValueError("invalid mode, mode = enum(0,1,2)")
예제 #10
0
 def get_data(self) -> Mapping[str, float]:
     try:
         info = nvgpu.gpu_info()
     except FileNotFoundError:
         return {}
     data = dict()
     M = 1024**2
     for entry in info:
         mem_free = (entry['mem_total'] - entry['mem_used']) * M
         data[f'gpu{entry["index"]}-mem-free'] = mem_free
     return data
예제 #11
0
def get_free_gpu() -> int:
    try:
        return next(
            map(
                lambda x: x[0],
                filter(
                    lambda x: x[1] < 150,
                    map(lambda x: (x[0], x[1]["mem_used"]),
                        enumerate(nvgpu.gpu_info())))))
    except StopIteration:
        raise NoGpuAvailable("No free gpu available.")
예제 #12
0
def get_available_gpus():
    gpu_info = nvgpu.gpu_info()
    gpu_device = "-1"
    gpu_mem = 0
    for device in gpu_info:
        available_mem = device['mem_total'] - device['mem_used']
        if available_mem > gpu_mem:
            gpu_mem = available_mem
            gpu_device = device['index']

    return gpu_device, gpu_mem
예제 #13
0
def validate_model_on_gpu():
    # A quick \ crude way of checking if model is loaded in GPU
    # Assumption is -
    # 1. GPUs on test setup are only utlizied by torchserve
    # 2. Models are successfully UNregistered between subsequent calls
    model_loaded = False
    for info in nvgpu.gpu_info():
        if info["mem_used"] > 0 and info["mem_used_percent"] > 0.0:
            model_loaded = True
            break
    return model_loaded
예제 #14
0
def get_gpu_memory_usage(mode=0):
    """There may be error on Windows, this method doesn't do error checking"""
    if mode == 0:
        return np.nan
    elif mode == 1:
        return sum([info['mem_used'] for info in nvgpu.gpu_info()])
    elif mode == 2:
        _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
        COMMAND = "nvidia-smi --query-gpu=memory.used --format=csv"
        memory_used_info = _output_to_list(sp.check_output(
            COMMAND.split()))[1:]
        memory_used_values = [
            int(x.split()[0]) for i, x in enumerate(memory_used_info)
        ]
        return np.sum(memory_used_values)
    else:
        raise ValueError("invalid mode, mode = enum(0,1,2)")
예제 #15
0
파일: train.py 프로젝트: skasai5296/VSE
def train(epoch, loader, model, optimizer, lossfunc, vocab, args):
    begin = time.time()
    maxit = int(len(loader.dataset) / args.batch_size)
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    metrics = {}
    for it, data in enumerate(loader):
        if torch.cuda.device_count() > 0 and it == 1:
            print(nvgpu.gpu_info())
        """image, target, index, img_id"""
        image = data["image"]
        caption = data["caption"]
        caption = [i[np.random.randint(0, len(i))] for i in caption]
        target = vocab.return_idx(caption)
        lengths = target.ne(vocab.padidx).sum(dim=1)

        optimizer.zero_grad()

        image = image.to(device)
        target = target.to(device)

        # im_emb, cap_emb = model(image, target, lengths)
        im_emb, cap_emb, gen, rec = model(image, target, lengths)
        # lossval = lossfunc(im_emb, cap_emb)
        lossval, lossdict = lossfunc(im_emb, cap_emb, gen, rec, target[:, 1:])
        lossval.backward()
        if not metrics:
            metrics = lossdict
        else:
            for k, v in lossdict.items():
                metrics[k] += v
        # clip gradient norm
        if args.grad_clip > 0:
            clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()
        if it % args.log_every == args.log_every - 1:
            lossstr = " | ".join(
                [f"{k}: {v/args.log_every:.05f}" for k, v in metrics.items()])
            print(
                f"epoch {epoch} | {sec2str(time.time()-begin)} | {it+1:06d}/{maxit:06d} iterations "
                f"| {lossstr}",
                flush=True,
            )
            metrics = {}
예제 #16
0
def ckech_available_gpu(num_gpu=1, min_mem_mb=8000):
    gpu_info = nvgpu.gpu_info()
    key_list = gpu_info[0].keys()

    gpu_info_dict = {}
    for key in key_list:
        gpu_info_dict[key] = []

    for elem in gpu_info:
        for key in key_list:
            gpu_info_dict[key].append(elem[key])

    gpu_info_df = pd.DataFrame(gpu_info_dict)
    gpu_info_df["mem_free"] = gpu_info_df["mem_total"] * (
        1 - gpu_info_df["mem_used_percent"] / 100)

    available_list = gpu_info_df.loc[
        gpu_info_df["mem_free"] > min_mem_mb].sort_values(
            "mem_used")["index"].values

    return available_list[:num_gpu]
예제 #17
0
def _allocate_gpu(num_gpus):
    current_user = pwd.getpwuid(os.getuid()).pw_name
    gpu_info = nvgpu.gpu_info()
    device_info = device_statuses()

    # assume nothing is available
    completely_available = [False for _ in gpu_info]
    same_user_available = [False for _ in gpu_info]

    for i, (_info, _device) in enumerate(zip(gpu_info, device_info)):
        completely_available[i] = _device['is_available']
        if _info[
                'mem_used_percent'] < mem_threshold and current_user in _device[
                    'users']:
            same_user_available[i] = True

    available_gpus = same_user_available
    if sum(same_user_available) == 0:
        available_gpus = completely_available

    available_gpus = [i for i, val in enumerate(available_gpus) if val]

    return available_gpus[:num_gpus]
예제 #18
0
    final_shape = [1, 128, 512, 512]
    for phase in range(7, 8):
        tf.reset_default_graph()
        shape = [1, 1] + list(np.array(base_shape)[1:] * 2**(phase - 1))
        real_image_input = tf.random.normal(shape=shape)

        train_gen, train_disc = main('surfgan', final_shape, real_image_input,
                                     latent_dim, base_dim, phase)

        init_op = tf.global_variables_initializer()

        with tf.Session() as sess:
            sess.graph.finalize()
            sess.run(init_op)
            start = time.time()

            print("Phase ", phase)
            sess.run([train_gen, train_disc])
            # Print memory info.
            try:
                print(nvgpu.gpu_info())
            except subprocess.CalledProcessError:
                pid = os.getpid()
                py = psutil.Process(pid)
                print(f"CPU Percent: {py.cpu_percent()}")
                print(f"Memory info: {py.memory_info()}")

            end = time.time()

            print(f"{end - start} seconds")
예제 #19
0
from src.Common import parse_cmd_args
from src.datasets.sample_datasets.W2VDataset import W2VDatasetClass
from src.models.sample_models.W2V import W2V

import nvgpu
import numpy as np

########################################################################################################################

args = parse_cmd_args()

city = "gijon".lower().replace(" ", "") if args.ct is None else args.ct
stage = 0 if args.stg is None else args.stg

gpu = int(
    np.argmin(list(map(lambda x: x["mem_used_percent"], nvgpu.gpu_info()))))
seed = 100 if args.sd is None else args.sd
l_rate = 5e-4 if args.lr is None else args.lr
n_epochs = 4000 if args.ep is None else args.ep
b_size = 1024 if args.bs is None else args.bs

# W2V ##################################################################################################################

base_path = "/media/nas/pperez/data/TripAdvisor/"
w2v_dts = W2VDatasetClass({
    "cities": ["gijon", "barcelona", "madrid"],
    "city": "multi",
    "remove_accents": True,
    "remove_numbers": True,
    "seed": seed,
    "data_path": base_path,
예제 #20
0
def get_gpu_mem_usage(gpu_id=0):
    gpu_infos = nvgpu.gpu_info()
    gpu_info = gpu_infos[gpu_id]
    gpu_mem_used_pct = gpu_info['mem_used_percent']

    return gpu_mem_used_pct
예제 #21
0
파일: Main.py 프로젝트: pablo-pnunez/ELVis
    parser.add_argument('-e', type=int, help="Epochs")
    parser.add_argument('-c', type=str, help="Ciudad", )

    ret_args = parser.parse_args()

    return ret_args


########################################################################################################################


args = cmd_read_args()

stage = "test" if args.stage is None else args.stage

gpu = np.argmin(list(map(lambda x: x["mem_used_percent"], nvgpu.gpu_info()))) if args.gpu is None else args.gpu

lrates = [5e-4] if args.lr is None else args.lr
dpouts = [0.2] if args.d is None else args.d
nimg = "10+10" if args.nimg is None else args.nimg
epochs = 100 if args.e is None else args.e
seed = 100 if args.s is None else args.s
city = "gijon" if args.c is None else args.c

os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)

config = {
        "neg_images": nimg,
        "learning_rate": lrates[0],
        "dropout": dpouts[0],
        "epochs": epochs,
예제 #22
0
logging.basicConfig(level=logging.DEBUG,
                    format='%(levelname)s: %(asctime)s %(filename)s'
                    ' [%(funcName)s:%(lineno)d][%(process)d] %(message)s',
                    datefmt='%m-%d %H:%M:%S',
                    filename=None,
                    filemode='a')

if __name__ == "__main__":
    from argparse import ArgumentParser
    try:
        arg_parser = ArgumentParser(
            description="print available_gpu id, using nvgpu")
        arg_parser.add_argument("-b",
                                "--best",
                                default=None,
                                type=int,
                                help="output best N")
        args = arg_parser.parse_args()

        if args.best is not None:
            gpus = sorted(nvgpu.gpu_info(),
                          key=lambda x: (x['mem_used'], x['index']))
            ids = [x['index'] for x in gpus]
            print(','.join(ids[:args.best]))
        else:
            print(','.join(nvgpu.available_gpus()))

    except Exception as e:
        traceback.print_exc()
        exit(-1)
예제 #23
0
plt.colorbar()
plt.subplot(1, 3, 2)
plt.imshow(np.transpose(fg_spec))
plt.title('Original Birdsong Track')
plt.gca().invert_yaxis()
plt.colorbar()
plt.subplot(1, 3, 3)
plt.imshow(np.transpose(fg_spec - mix_spec))
plt.title('Difference')
plt.gca().invert_yaxis()
plt.colorbar()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

nvgpu.gpu_info()

torch.cuda.empty_cache()

nvgpu.gpu_info()

# define optimization parameters: number of epochs, batch size, and learning rate
num_epochs = 800
learning_rate = 1e-4

# define model, loss, and optimizer
model = convEncoder(device).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0)

# make lists to store info
예제 #24
0
def update():
    time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    system_stats["cpu"].append([time_str,
                                psutil.cpu_percent()])  # this gives an average
    system_stats["memory"].append(psutil.virtual_memory().percent)
    system_stats["gpu"].append(nvgpu.gpu_info())
import nvgpu

gpu_info = nvgpu.gpu_info()

model_loaded = False

for info in gpu_info:
    if info['mem_used'] > 0 and info['mem_used_percent'] > 0.0:
        model_loaded = True
        break

if not model_loaded:
    exit(1)
예제 #26
0
def io(memory_args):

    heart = time.time()
    heartbeat_interval = 60 * 10  # 10 minutes

    memory_capacity = memory_args["capacity"]
    memory_alpha = memory_args["alpha"]
    memory_beta = memory_args["beta"]
    replay_size_before_sampling = memory_args["replay_size_before_sampling"]
    batch_in_queue_limit = memory_args["batch_in_queue_limit"]
    batch_size = memory_args["batch_size"]
    learner_io_queue = memory_args["learner_io_queue"]
    io_learner_queue = memory_args["io_learner_queue"]
    actor_io_queue = memory_args["actor_io_queue"]

    # Logging of priority distributions
    should_log = memory_args["log_priority_dist"]
    if should_log and could_import_tb:
        start_time = memory_args["start_time"]
        tb_write_dir = "runs/{}/IO/".format(start_time)
        tb_write_frequency = memory_args["log_write_frequency"]
        tb_priority_sample_max = memory_args["log_priority_sample_max"]
        tb_priority_sample_interval_size = memory_args[
            "log_priority_sample_interval_size"]
        samples_actor = np.zeros(
            int(tb_priority_sample_max / tb_priority_sample_interval_size))
        samples_learner = np.zeros(
            int(tb_priority_sample_max / tb_priority_sample_interval_size))

        tb = SummaryWriter(tb_write_dir)
        tb_nvidia_log_freq = 10  #seconds

    replay_memory = PrioritizedReplayMemory(memory_capacity, memory_alpha)

    log_count_actor = 0
    log_count_learner = 0
    count_gen_trans = 0
    count_cons_trans = 0
    count_total_gen_trans = 0
    count_total_cons_trans = 0
    start_learning = False
    total_amout_transitions = 0
    nvidia_log_time = time.time()
    stop_watch = time.time()
    while (True):

        # empty queue of transtions from actors
        while (actor_io_queue.empty() == False):

            transitions = actor_io_queue.get()
            for i in range(len(transitions)):
                t, p = transitions[i]
                replay_memory.save(t, p)
                total_amout_transitions += 1

                # log distribution
                if should_log:
                    count_gen_trans += 1
                    samples_actor[min(
                        int(p / tb_priority_sample_interval_size),
                        len(samples_actor) - 1)] += 1

            # append logged priorities from actor to file
            #log_count_actor += 1
            #if should_log and could_import_tb and log_count_actor >= tb_write_frequency:
            #    log_count_actor  = 0
            #    count_total_gen_trans  += count_gen_trans
            #    count_total_cons_trans += count_cons_trans
            #    t = time.time()
            #    tb.add_histogram("Distribution/Actor Distribution", samples_actor)
            #    tb.add_scalars("Data/", {"Total Consumption":count_total_cons_trans, "Total Generation":count_total_gen_trans})
            #    tb.add_scalars("Data/", {"Consumption per Second":count_cons_trans/(t-stop_watch), "Generation per Second":count_gen_trans/(t-stop_watch)})
            #    stop_watch = time.time()
            #    count_gen_trans  = 0
            #    count_cons_trans = 0
            #    samples_actor    = np.zeros(int(tb_priority_sample_max/tb_priority_sample_interval_size))

            if should_log and nvidia_log_time + tb_nvidia_log_freq < time.time(
            ):
                nvidia_log_time = time.time()
                gpu_info = nvgpu.gpu_info()
                for i in gpu_info:

                    gpu = '{} {}'.format(i['type'], i['index'])
                    mem_total = i['mem_total']
                    mem_used = i['mem_used']
                    tb.add_scalars(gpu, {
                        'mem_total': mem_total,
                        'mem_used': mem_used
                    })

                count_total_gen_trans += count_gen_trans
                count_total_cons_trans += count_cons_trans
                t = time.time()
                tb.add_scalars(
                    "Data/total/", {
                        "Total Consumption": count_total_cons_trans,
                        "Total Generation": count_total_gen_trans
                    })
                tb.add_scalars(
                    "Data/speed/", {
                        "Consumption per Second":
                        count_cons_trans / (t - stop_watch),
                        "Generation per Second":
                        count_gen_trans / (t - stop_watch)
                    })
                tb.add_histogram("Distribution/Actor Distribution",
                                 samples_actor)
                tb.add_histogram("Distribution/Learner Distribution",
                                 samples_learner)

                print("Consuption per Second ",
                      count_cons_trans / (t - stop_watch))
                print("Generation per Second ",
                      count_gen_trans / (t - stop_watch))
                stop_watch = time.time()
                samples_actor = np.zeros(
                    int(tb_priority_sample_max /
                        tb_priority_sample_interval_size))
                samples_learner = np.zeros(
                    int(tb_priority_sample_max /
                        tb_priority_sample_interval_size))
                count_gen_trans = 0
                count_cons_trans = 0

        # Sample sample transitions until there are x in queue to learner
        if (not start_learning
            ) and replay_memory.filled_size() >= replay_size_before_sampling:
            start_learning = True

        while (start_learning
               and io_learner_queue.qsize() < batch_in_queue_limit):
            transitions, weights, indices, priorities = replay_memory.sample(
                batch_size, memory_beta)
            data = (transitions, weights, indices)
            io_learner_queue.put(data)

            # log distribution
            if should_log:
                count_cons_trans += batch_size
                samples_learner[np.minimum(
                    (np.array(priorities) /
                     tb_priority_sample_interval_size).astype(np.int),
                    len(samples_actor) - 1)] += 1

            # append logger priorities going to actor to file
            # log_count_learner += 1
            # if should_log and could_import_tb and log_count_learner >= tb_write_frequency:
            #     log_count_learner = 0
            #     tb.add_histogram("Distribution/Learner Distribution", samples_actor)
            #     samples_learner = np.zeros(int(tb_priority_sample_max/tb_priority_sample_interval_size))

        # empty queue from learner
        terminate = False
        while (not learner_io_queue.empty()):

            msg, item = learner_io_queue.get()

            if msg == "priorities":
                # Update priorities
                indices, priorities = item
                replay_memory.priority_update(indices, priorities)
            elif msg == "terminate":
                if should_log and could_import_tb:
                    tb.close()
                print("Total amount of generated transitions: ",
                      total_amout_transitions)

        if time.time() - heart > heartbeat_interval:
            heart = time.time()
            tb.add_scalar("Heartbeat/IO", 1)
예제 #27
0
def get_gpu_info():
    return nvgpu.gpu_info()[0]
예제 #28
0
import nvgpu

logging.basicConfig(
    level=logging.DEBUG,
    format='%(levelname)s: %(asctime)s %(filename)s'
    ' [%(funcName)s:%(lineno)d][%(process)d] %(message)s',
    datefmt='%m-%d %H:%M:%S',
    filename=None,
    filemode='a')

if __name__ == "__main__":
    from argparse import ArgumentParser
    try:
        arg_parser = ArgumentParser(
            description="print available_gpu id, using nvgpu")
        arg_parser.add_argument(
            "-b", "--best", default=None, type=int, help="output best N")
        args = arg_parser.parse_args()

        if args.best is not None:
            gpus = sorted(
                nvgpu.gpu_info(), key=lambda x: (x['mem_used'], x['index']))
            ids = [x['index'] for x in gpus]
            print(','.join(ids[:args.best]))
        else:
            print(','.join(nvgpu.available_gpus()))

    except Exception as e:
        traceback.print_exc()
        exit(-1)
예제 #29
0
def get_sys_details():
    """ Get every owns of device information and returns a dict """

    try:
        cpu_info = {}
        os_info = {}
        network_info = {}
        system_info = {}
        battery_info = {}
        disc_partition_info = {}
        partition_storage = {}
        gpu_info = nvgpu.gpu_info()[0]

        if not gpu_info:
            gpu_info = {}

        os_info['platform'] = platform.system()
        os_info['platform-release'] = platform.release()
        os_info[
            'Operating System'] = f'{platform.system()} {platform.release()}'
        os_info['platform-version'] = platform.version()
        system_info['Architecture'] = platform.machine()
        network_info['hostname'] = socket.gethostname()
        network_info['IP-address'] = socket.gethostbyname(socket.gethostname())
        system_info['mac-address'] = ':'.join(
            re.findall('..', '%012x' % uuid.getnode()))
        cpu_info['processor'] = platform.processor()
        cpu_info['Cores'] = psutil.cpu_count()
        cpu_info['Frequency - Current (MHz)'] = psutil.cpu_freq().current
        cpu_info['Frequency - Minimum (MHz)'] = psutil.cpu_freq().min
        cpu_info['Frequency - Maximum (MHz)'] = psutil.cpu_freq().max
        """ Battery Status """
        battry_obj = psutil.sensors_battery()
        battery_info['Percent'] = battry_obj.percent

        if battry_obj.secsleft != psutil.POWER_TIME_UNLIMITED and battry_obj.secsleft != psutil.POWER_TIME_UNKNOWN:
            total_seconds = battry_obj.secsleft
            hours = total_seconds // 3600
            total_seconds %= 3600
            minutes = total_seconds // 60
            battery_info[
                'Time Left'] = f'{hours} Hours, {minutes} Minutes (approx)'
        elif battry_obj.secsleft == psutil.POWER_TIME_UNLIMITED:
            battery_info['Time Left'] = 'N/A -- Plugged In'
        elif battry_obj.secsleft == psutil.POWER_TIME_UNKNOWN:
            battry_obj['Time Left'] = 'N/A -- Battery Not Available'
        else:
            battry_obj['Time Left'] = 'N/A -- Unknown'
        battery_info['Power Plugged'] = battry_obj.power_plugged
        """ Memory (RAM) """
        ram_info = bytes_to_gigs(psutil.virtual_memory())
        """ Disc """
        partitions_list = psutil.disk_partitions()
        for partitions in partitions_list:
            temp = []
            for each in partitions:
                temp.append(each)
            disc_partition_info[partitions.device] = temp
            partition_storage[partitions.device] = bytes_to_gigs(
                psutil.disk_usage(partitions.device))

        dic_list = [
            network_info, os_info, system_info, cpu_info, gpu_info,
            battery_info, ram_info, disc_partition_info, partition_storage
        ]
        dic_label = 'Network info,OS Info,System Info,CPU Info,GPU Info,Battery Info,RAM Info,Disc Info,Partition Info'.split(
            ',')

        return {label: val for label, val in zip(dic_label, dic_list)}

    except:
        pass
예제 #30
0
 def gpu_stats(self) -> pd.DataFrame:
     x = pd.DataFrame.from_dict(nvgpu.gpu_info())
     x["machine"] = 0
     x["mem_free"] = x["mem_total"] - x["mem_used"]
     x["unique_gpu"] = x.apply(lambda y: (y["machine"], y["index"]), axis=1)
     return x