Ejemplo n.º 1
0
    def train(self, epoch):
        self.model.train()
        print("Epoch {0}/{1}".format(epoch, self.n_epochs))
        t = tqdm(self.train_loader)
        loss_avg = 0.0
        n = 1
        
        for param_group in self.optimizer.param_groups:
            current_lr = param_group['lr']

        for batch_idx, (stokes, phys) in enumerate(t):
            stokes = stokes.to(self.device)
            phys = phys.to(self.device)
            
            self.optimizer.zero_grad()
            
            out_phys = self.model(stokes)

            # Loss
            loss = torch.mean(self.weights[None,:]*(out_phys-phys)**2)
                                                        
            loss.backward()

            self.optimizer.step()

            loss_avg = self.smooth * loss.item() + (1.0 - self.smooth) * loss_avg
            
            tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle)

            t.set_postfix(loss=loss_avg, lr=current_lr, gpu=tmp.gpu, mem=tmp.memory)
            
        self.loss.append(loss_avg)
Ejemplo n.º 2
0
def check_gpu_stat():
    nvidia_smi.nvmlInit()
    deviceCount = nvmlDeviceGetCount()
    for i in range(deviceCount):
        handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
        res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
        print(f'gpu{i}: {res.gpu}%, gpu-mem: {res.memory}%')
Ejemplo n.º 3
0
    def test(self):
        self.model.eval()

        loss_L2_avg = 0.0

        n = 1
        t = tqdm(self.test_loader)

        for param_group in self.optimizer.param_groups:
            current_lr = param_group['lr']

        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(t):

                if self.cuda:
                    data, target = data.to(self.device), target.to(self.device)

                output = self.model(data)

                # sum up batch loss
                loss_L2 = self.lossfn_L2(output, target)

                loss_L2_avg += (loss_L2.item() - loss_L2_avg) / n
                n += 1

                self.loss_L2_val.append(loss_L2_avg)

                if self.cuda:
                    tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle)
                    t.set_postfix(loss=loss_L2_avg,
                                  lr=current_lr,
                                  gpu=tmp.gpu,
                                  mem=tmp.memory)
                else:
                    t.set_postfix(loss=loss_L2_avg, lr=current_lr)
Ejemplo n.º 4
0
    def train(self, epoch):
        """
        Train for one epoch
        """

        # Set model in training mode
        self.model.train()

        print("Epoch {0}/{1}".format(epoch, self.n_epochs))
        t = tqdm(self.train_loader)
        loss_avg = 0.0

        # Get current learning rate
        for param_group in self.optimizer.param_groups:
            current_lr = param_group['lr']

        for batch_idx, (images, images_ft, variance) in enumerate(t):

            # Move all data to GPU/CPU
            images, images_ft, variance = images.to(self.device), images_ft.to(
                self.device), variance.to(self.device)

            # Zero the gradients in the optimizer
            self.optimizer.zero_grad()

            # Evaluate the model
            coeff, numerator, denominator, psf, psf_ft, loss = self.model(
                images, images_ft, variance)

            # Backpropagate
            loss.backward()

            if (batch_idx == 0):
                loss_avg = loss.item()
            else:
                loss_avg = self.smooth * loss.item() + (1.0 -
                                                        self.smooth) * loss_avg

            # Update the weights according to the optimizer
            self.optimizer.step()

            # Get GPU usage for printing
            gpu_usage = ''
            memory_usage = ''
            if (NVIDIA_SMI):
                tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle)
                gpu_usage = gpu_usage + f' {tmp.gpu}'
                memory_usage = memory_usage + f' {tmp.memory}'

                t.set_postfix(loss=loss.item(),
                              loss_avg=loss_avg,
                              lr=current_lr,
                              gpu=gpu_usage,
                              mem=memory_usage)
            else:
                t.set_postfix(loss=loss.ite(),
                              loss_avg=loss_avg,
                              lr=current_lr)

        self.loss.append(loss_avg)
Ejemplo n.º 5
0
    def train(self, epoch):
        self.model.train()
        print("Epoch {0}/{1}".format(epoch, self.n_epochs))
        t = tqdm(self.train_loader)
        loss_avg = 0.0
        n = 1
        
        for param_group in self.optimizer.param_groups:
            current_lr = param_group['lr']

        for batch_idx, (inputs, outputs) in enumerate(t):
            inputs = inputs.to(self.device)
            outputs = outputs.to(self.device)
            
            self.optimizer.zero_grad()
            out = self.model(inputs)
            
            # Loss
            loss = self.loss_fn(out, outputs)
                    
            loss.backward()

            self.optimizer.step()

            loss_avg = self.smooth * loss.item() + (1.0 - self.smooth) * loss_avg

            if (NVIDIA_SMI):
                tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle)
                t.set_postfix(loss=loss_avg, lr=current_lr, gpu=tmp.gpu, mem=tmp.memory)
            else:
                t.set_postfix(loss=loss_avg, lr=current_lr)
            
        self.loss.append(loss_avg)
Ejemplo n.º 6
0
    def stats(self, n_iter):
        if self.eps is not None:
            self.log.add_scalar('eps', self.eps(), n_iter)

        if self.handle is not None:
            res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle)
            self.log.add_scalar('nvidia/load', res.gpu, n_iter)
            res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.handle)
            self.log.add_scalar('nvidia/mem_gb', res.used / (1024**3), n_iter)
Ejemplo n.º 7
0
 def get():
     handles = []
     output = []
     for device_id in nvidia_smi.nvmlDeviceGetCount():
         handles.append(nvidia_smi.nvmlDeviceGetHandleByIndex(device_id))
     for handle in handles:
         res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
         output.append({'usage': res.gpu, 'memory': res.memory})
     return output
Ejemplo n.º 8
0
 def Available_GPUs(self):
     available = []
     for i in range(self.total_gpus):
         handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
         res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
         mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
         if res.gpu < 30 and (mem_res.used / mem_res.total * 100) < 30:
             available.append(i)
     return available
Ejemplo n.º 9
0
 def get_usage(gpu_list=None, **kwargs):
     """ Track GPU memory utilization. """
     _ = kwargs
     gpu_list = gpu_list or [0]
     nvidia_smi.nvmlInit()
     handle = [nvidia_smi.nvmlDeviceGetHandleByIndex(i) for i in gpu_list]
     res = [
         nvidia_smi.nvmlDeviceGetUtilizationRates(item) for item in handle
     ]
     return [item.memory for item in res]
Ejemplo n.º 10
0
def available_GPUs(total_gpus):
    available_gpus = []
    for i in range(total_gpus):
        handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
        res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
        mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
        if res.gpu < 30 and (
                mem_res.used / mem_res.total * 100
        ) < 30:  # Jon heuristically defines what it means for a GPU to be available
            available_gpus.append(i)
    return available_gpus
Ejemplo n.º 11
0
    def on_train_batch_begin(self, batch, logs=None):

        nvidia_smi.nvmlInit()
        handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
        # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate

        res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
        res1 = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
        #GPUs = GPU.getGPUs()
        #gpu = GPUs[0]

        print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
Ejemplo n.º 12
0
    def output(self, data_dict, n_iter):
        if self.handle is not None:
            res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle)
            self.log.add_scalar('nvidia/load', res.gpu, n_iter)
            res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.handle)
            self.log.add_scalar(
                'nvidia/mem_gb', res.used / (1024 ** 3), n_iter)

        for key, val in data_dict.items():
            if hasattr(val, 'shape') and np.prod(val.shape) > 1:
                self.log.add_histogram(key, val, n_iter)
            else:
                self.log.add_scalar(key, val, n_iter)
Ejemplo n.º 13
0
def run(data_loader, engine):
    batch_time = AverageMeter()
    gpu = AverageMeter()
    gpu_mem = AverageMeter()
    # Allocate buffers and create a CUDA stream.
    h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
    # Contexts are used to perform inference.
    input = torch.rand((args.batch_size, ) + ModelData.INPUT_SHAPE)
    # if data_loader != 0:

    with engine.create_execution_context() as context:
        end = time.time()
        # for i in range(args.loop):
        for i, (input, target) in enumerate(data_loader):
            if i == args.loop:
                break
            np.copyto(h_input, input.reshape(-1))

            do_inference(context, h_input, d_input, h_output, d_output, stream)
            batch_time.update(time.time() - end)
            end = time.time()
            # https://pypi.org/project/py3nvml/
            util_rate = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
            # print(util_rate.gpu, util_rate.memory)
            gpu.update(util_rate.gpu)
            mem_info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
            gpu_mem.update(mem_info.used >> 20)

            print("predict:", h_output)
            print("predict: ", h_output.shape)
            print("target:", target)
            print("target:", target.shape)

            if i % args.print_freq == 0 and not args.csv:
                print(
                    '[{}/{}] batch time {batch_time.val:.3f} s (avg:{batch_time.avg:.3f})'
                    .format(i, args.loop, batch_time=batch_time))
    # print summary
    if args.csv:
        print("{}, {:.3f}, {:.3f}, {:.3f}, {}".format(
            args.batch_size, args.loop * args.batch_size / batch_time.sum,
            batch_time.avg, gpu.avg, gpu_mem.avg))
    else:
        print("batchsize: {} ".format(args.batch_size))
        print("throughput: {:.3f} img/sec".format(args.loop * args.batch_size /
                                                  batch_time.sum))
        print("Latency: {:.3f} sec".format(batch_time.avg))
        # see https://forums.fast.ai/t/show-gpu-utilization-metrics-inside-training-loop-without-subprocess-call/26594
        # show gpu utilization metrics inside trianing loop
        print("GPU util: {:.3f} %, GPU mem: {} MiB".format(
            gpu.avg, gpu_mem.avg))
Ejemplo n.º 14
0
    def train(self, epoch):
        self.model.train()
        print("Epoch {0}/{1}".format(epoch, self.n_epochs))
        t = tqdm(self.train_loader)
        loss_avg = 0.0
        n = 1

        for param_group in self.optimizer.param_groups:
            current_lr = param_group['lr']

        for batch_idx, (Phi_split, surface, clouds, rho,
                        d_split) in enumerate(t):
            Phi_split, surface, clouds, rho, d_split = Phi_split.to(
                self.device), surface.to(self.device), clouds.to(
                    self.device), rho.to(self.device), d_split.to(self.device)

            self.optimizer.zero_grad()

            surf, clouds, out_surface, out_clouds = self.model(d_split,
                                                               self.surf0,
                                                               self.clouds0,
                                                               Phi_split,
                                                               rho,
                                                               n_epochs=5)

            # Loss
            loss = 0.0
            for i in range(self.K):
                loss += self.loss_fn(out_surface[i], surface)
                # loss += self.loss_fn(out_clouds[i], clouds)

            loss.backward()

            self.optimizer.step()

            if (batch_idx == 0):
                loss_avg = loss.item()
            else:
                loss_avg = self.smooth * loss.item() + (1.0 -
                                                        self.smooth) * loss_avg

            if (NVIDIA_SMI):
                tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle)
                t.set_postfix(loss=loss_avg,
                              lr=current_lr,
                              gpu=tmp.gpu,
                              mem=tmp.memory)
            else:
                t.set_postfix(loss=loss_avg, lr=current_lr)

        self.loss.append(loss_avg)
Ejemplo n.º 15
0
 def cal_gpu_util(job):
     ct = 0
     gpu = 0
     nvidia_smi.nvmlInit()
     for key in job.gpus_loc.keys():
         for i in job.gpus_loc[key]:
             ct += 1
             handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
             res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
             gpu += res.gpu
     if ct > 0:
         avg = gpu / ct
         return avg
     else:
         print('job no gpu')
         return 0
Ejemplo n.º 16
0
def get_SystemStats(process, NVIDIA_GPU):
    if NVIDIA_GPU:
        deviceCount = nvidia_smi.nvmlDeviceGetCount()
        gpu_memory = []
        gpu_utilization = []
        for i in range(0, deviceCount):
            handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
            gpu_stat = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
            gpu_memory.append(gpu_stat.memory)
            gpu_utilization.append(gpu_stat.gpu)
    else:
        gpu_memory = []
        gpu_utilization = []

    sys_memory = process.memory_info()[0] / 2. ** 30

    return gpu_memory, gpu_utilization, sys_memory
Ejemplo n.º 17
0
def get_gpu():
    n_gpus = gs.n_gpus()
    G, M = [], []
    for i in xrange(n_gpus):
        bus_id = gs.bus_id(i)
        h = nvidia_smi.nvmlDeviceGetHandleByPciBusId("0000:%d:00.0" % bus_id)

        memutil, gpuutil = [], []
        for k in xrange(100):
            util = nvidia_smi.nvmlDeviceGetUtilizationRates(h)
            memutil.append(util.memory)
            gpuutil.append(util.gpu)
            time.sleep(.01)
        G.append(np.mean(gpuutil))
        M.append(np.mean(memutil))
    print "GPU Utilization:", G
    print "Mem Utilization:", M
    return np.argmin(2*np.array(G) + np.array(M))
Ejemplo n.º 18
0
    def Waypoint(self):

        elapsedTime = time.perf_counter() - self.WaypointStartTime
        self.WaypointStartTime = time.perf_counter()

        memoryUsage = psutil.virtual_memory()[2]
        cpuUsage = psutil.cpu_percent()

        self.MemoryUsage.append(memoryUsage)
        self.CpuUsage.append(cpuUsage)
        self.ElapsedTime.append(elapsedTime)

        mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.handle)
        res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle)

        self.gpu_mem.append(mem_res.used / mem_res.total)
        self.gpu_usage.append(res.gpu)

        return elapsedTime, cpuUsage, memoryUsage, mem_res.used / mem_res.total, res.gpu
Ejemplo n.º 19
0
 def getHardwareStatus(self):
     res = {}
     try:
         c_t = int(psutil.sensors_temperatures()['i350bb'][0].current)
         #c_t = 0
         res = {'cpu':[int(psutil.cpu_percent()), int(psutil.virtual_memory().percent), c_t, len(self.camsList)]}        
         if self.isGPU:
             if self.gpuInfo:
                 for name in self.gpuInfo:
                     index = self.gpuInfo[name]
                     handle = nvidia_smi.nvmlDeviceGetHandleByIndex(index)
                     r = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
                     #temperature = 0
                     temperature = nvidia_smi.nvmlDeviceGetTemperature(handle, nvidia_smi.NVML_TEMPERATURE_GPU)
                     num = len(self.gpusActiveList[name].cams)
                     res[name] = [int(r.gpu), int(r.memory), int(temperature), num]
     except:
         print("get hardware")
         print(sys.exc_info())
     return res
Ejemplo n.º 20
0
    def get_vals(self):

        # cmd = ['nvidia-settings', '-t', '-q', 'GPUUtilization']
        # gpu_util = subprocess.check_output(cmd).strip().decode('utf-8').split(",")
        # gpu_util = dict([f.strip().split("=") for f in gpu_util])
        # cmd[-1] = 'UsedDedicatedGPUMemory'
        # gpu_used_mem = subprocess.check_output(cmd).strip().decode('utf-8')

        nvidia_smi.nvmlInit()
        # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate
        self.gpu_handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
        util_res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.gpu_handle)
        #mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.gpu_handle)
        # current_vals = {"gpu_mem_alloc": mem_res.used / (1024**2), "gpu_graphics_util": int(gpu_util['graphics']),
        #                 "gpu_mem_util": gpu_util['memory'], "time": time.time()}
        current_vals = {
            "gpu_graphics_util": float(util_res.gpu),
            "time": time.time()
        }
        return current_vals
Ejemplo n.º 21
0
def nvapi():
    nvmlInit()
    ret = {}
    n_gpus = int(nvmlDeviceGetCount())
    ret['n_gpus'] = n_gpus
    for i in range(n_gpus):
        gpu_str = '{}.'.format(i)
        gpu_obj = nvmlDeviceGetHandleByIndex(i)
        ret[gpu_str + 'temp'] = nvmlDeviceGetTemperature(
            gpu_obj, NVML_TEMPERATURE_GPU)
        this_ram = nvmlDeviceGetMemoryInfo(gpu_obj)
        ret[gpu_str + 'ram.used'] = this_ram.used / MB
        ret[gpu_str + 'ram.total'] = this_ram.total / MB
        ret[gpu_str +
            'power.current'] = nvmlDeviceGetPowerUsage(gpu_obj) / 1000.0
        ret[gpu_str +
            'power.limit'] = nvmlDeviceGetEnforcedPowerLimit(gpu_obj) / 1.0
        ret[gpu_str +
            'util'] = nvmlDeviceGetUtilizationRates(gpu_obj).gpu / 1.0
    nvmlShutdown()
    return ret
Ejemplo n.º 22
0
def run(model):

    batch_time = AverageMeter()
    gpu = AverageMeter()
    gpu_mem = AverageMeter()
    input = torch.rand((args.batch_size,) + ModelData.INPUT_SHAPE)
    with torch.no_grad():
        end = time.time()
        for i in range(args.loop):
            input_cuda = input.cuda(non_blocking=True)
            model(input_cuda)
            torch.cuda.synchronize()
            batch_time.update(time.time() - end)
            end = time.time()
            # https://pypi.org/project/py3nvml/
            util_rate = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
            # print(util_rate.gpu, util_rate.memory)
            gpu.update(util_rate.gpu)
            mem_info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
            gpu_mem.update(mem_info.used >> 20)

            if i % args.print_freq == 0 and not args.csv:
                print('[{}/{}] batch time {batch_time.val:.3f} s (avg: {batch_time.avg:.3f})'.format(
                    i, args.loop, batch_time=batch_time))
    # print summary
    if args.csv:
        print("{}, {:.3f}, {:.3f}, {:.3f}, {}".format(
            args.batch_size,
            args.loop * args.batch_size / batch_time.sum,
            batch_time.avg,
            gpu.avg, gpu_mem.avg))
    else:
        print("batchsize: {} ".format(args.batch_size))
        print("throughput: {:.3f} img/sec".format(args.loop *
                                                  args.batch_size / batch_time.sum))
        print("Latency: {:.3f} sec".format(batch_time.avg))
        # see https://forums.fast.ai/t/show-gpu-utilization-metrics-inside-training-loop-without-subprocess-call/26594
        # show gpu utilization metrics inside trianing loop
        print("GPU util: {:.3f} %, GPU mem: {} MiB".format(
            gpu.avg, gpu_mem.avg))
Ejemplo n.º 23
0
    def train(self, epoch):
        self.model.train()
        print("Epoch {0}/{1} - {2}".format(epoch, self.n_epochs,
                                           time.strftime("%Y_%m_%d-%H_%M_%S")))
        t = tqdm(self.train_loader)

        loss_L2_avg = 0.0

        n = 1

        for param_group in self.optimizer.param_groups:
            current_lr = param_group['lr']

        for batch_idx, (data, target) in enumerate(t):
            if self.cuda:
                data, target = data.to(self.device), target.to(self.device)

            self.optimizer.zero_grad()
            output = self.model(data)

            loss_L2 = self.lossfn_L2(output, target)

            loss_L2_avg += (loss_L2.item() - loss_L2_avg) / n
            n += 1

            self.loss_L2.append(loss_L2_avg)

            loss_L2.backward()
            self.optimizer.step()

            if self.cuda:
                tmp = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle)
                t.set_postfix(loss=loss_L2_avg,
                              lr=current_lr,
                              gpu=tmp.gpu,
                              mem=tmp.memory)
            else:
                t.set_postfix(loss=loss_L2_avg, lr=current_lr)
Ejemplo n.º 24
0
 def show_GPU(self):
     res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle)
     # print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
     return res.memory
Ejemplo n.º 25
0
#!/usr/bin/env python3
#coding: utf8
#author: Tian Xia ([email protected])

import nvidia_smi

if __name__ == "__main__":
    nvidia_smi.nvmlInit()
    for gpu_id in range(8):
        handle = nvidia_smi.nvmlDeviceGetHandleByIndex(gpu_id)
        res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
        print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
Ejemplo n.º 26
0
def resource_util(pid, interval):
    '''
        arg:
            pid: process id (int)
        
        example return:
            {
                'pid': 24832, 
                'cpu': 0.0, 
                'mem_total': 3371, 
                'mem_shared': 502, 
                'mem_data': 3039, 
                'gpu_id': 0, 
                'gpu_mem': 5985.0, 
                'gpu_usage': 100, 
                'result': [24832, 0.0, 3371, 502, 3039, 0, 5985.0, 100]
            }
    '''
    nvidia_smi.nvmlInit()
    # Get resources used by process
    p = psutil.Process(pid)
    usage = {'pid': pid}
    result = [pid]

    # cpu usage of current PID
    usage['cpu'] = p.cpu_percent(interval=interval)
    result.append(usage['cpu'])
    # Memory usage current PID
    mem = p.memory_info()
    # print(mem, type(mem))
    usage['mem_total'] = mem.rss >> 20
    result.append(usage['mem_total'])
    usage['mem_shared'] = mem.shared >> 20
    result.append(usage['mem_shared'])
    usage['mem_data'] = mem.data >> 20
    result.append(usage['mem_data'])

    for process in (nvsmi.get_gpu_processes()):
        # print(process.pid, process.gpu_id, process.used_memory)
        if process.pid == pid:
            usage['gpu_id'] = int(process.gpu_id)
            result.append(usage['gpu_id'])
            usage['gpu_mem'] = process.used_memory
            result.append(usage['gpu_mem'])
            handle = nvidia_smi.nvmlDeviceGetHandleByIndex(int(process.gpu_id))
            res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
            usage[
                'gpu_usage'] = res.gpu  # gpu utilization, may not only by this process
            result.append(usage['gpu_usage'])
            break
    else:
        usage['gpu_id'] = None
        result.append(usage['gpu_id'])
        usage['gpu_mem'] = None
        result.append(usage['gpu_mem'])
        usage[
            'gpu_usage'] = None  # gpu utilization, may not only by this process
        result.append(usage['gpu_usage'])

    usage['result'] = result
    return usage
Ejemplo n.º 27
0
    def run_scenario(self,path,path1):
        """
        Trigger the start of the scenario and wait for it to finish/fail
        """
        print("ScenarioManager: Running scenario {}".format(self.scenario_tree.name))

        self.start_system_time = time.time()
        start_game_time = GameTime.get_time()
        total_risk_score = []
        self._watchdog.start()
        self._running = True

        x = 0
        while self._running:
            timestamp = None
            world = CarlaDataProvider.get_world()
            if world:
                snapshot = world.get_snapshot()
                if snapshot:
                    timestamp = snapshot.timestamp
            if timestamp:
                self._tick_scenario(timestamp)
            x+=1
            if(x%60==1):
                res = nvidia_smi.nvmlDeviceGetUtilizationRates(self.handle)
                mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(self.handle)
                #print(f'mem: {mem_res.used / (1024**2)} (GiB)') # usage in GiB
                print(f'mem: {100 * (mem_res.used / mem_res.total):.3f}%') # percentage usage
                cpu = psutil.cpu_percent()#cpu utilization stats
                mem = psutil.virtual_memory()#virtual memory stats
                print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')

                fields = ['GPU Utilization',
                            'GPU Memory',
                            'CPU Utilization',
                            'CPU Memory'
                        ]

                dict = [{'GPU Utilization':res.gpu, 'GPU Memory':100 * (mem_res.used / mem_res.total), 'CPU Utilization':cpu, 'CPU Memory':100 * (mem.used/mem.total)}]


                file_exists = os.path.isfile(path1)
                with open(path1, 'a') as csvfile:
                    # creating a csv dict writer object
                    writer = csv.DictWriter(csvfile, fieldnames = fields)
                    if not file_exists:
                        writer.writeheader()
                    writer.writerows(dict)


                #total_risk_score.append(risk_score)
        #print("--------------------------------------------------------------------------")
        #print("Average Risk Score:%f"%(float(sum(total_risk_score))/len(total_risk_score)))
        #print("--------------------------------------------------------------------------")

        self._watchdog.stop()

        self.end_system_time = time.time()
        end_game_time = GameTime.get_time()

        self.scenario_duration_system = self.end_system_time - \
            self.start_system_time
        self.scenario_duration_game = end_game_time - start_game_time

        fields = ['Route Completed',
                    'Collisions'
                ]

        route_completed, collisions = self._console_message()

        dict = [{'Route Completed':route_completed, 'Collisions':collisions}]


        file_exists = os.path.isfile(path)
        with open(path, 'a') as csvfile:
            # creating a csv dict writer object
            writer = csv.DictWriter(csvfile, fieldnames = fields)
            if not file_exists:
                writer.writeheader()
            writer.writerows(dict)
Ejemplo n.º 28
0
    def __get_gpu_info(self):
        def parse_unit(val, scale=1000):
            unit_ls = ['B', 'KB', 'MB', 'GB']
            unit_lv = 0
            while val >= scale:
                val /= scale
                unit_lv += 1
                if unit_lv == len(unit_ls) - 1:
                    break
            return '{:.2f} {}'.format(val, unit_ls[unit_lv])

        sum_info = []
        process_ls = []

        nv.nvmlInit()
        gpu_num = nv.nvmlDeviceGetCount()
        # 遍历每块卡
        for gpu_idx in range(gpu_num):
            h = nv.nvmlDeviceGetHandleByIndex(gpu_idx)
            dev_name = nv.nvmlDeviceGetName(h).decode()
            raw_total_mem = nv.nvmlDeviceGetMemoryInfo(h).total
            total_mem = parse_unit(raw_total_mem, 1024)
            raw_used_mem = nv.nvmlDeviceGetMemoryInfo(h).used
            used_mem = parse_unit(raw_used_mem, 1024)
            gpu_util = '{:.2f}'.format(nv.nvmlDeviceGetUtilizationRates(h).gpu)
            gpu_mem_util = '{:.2f}'.format(raw_used_mem * 100 / raw_total_mem)

            tmp = {}
            tmp['gpu_idx'] = str(gpu_idx)
            tmp['dev_name'] = dev_name
            tmp['total_mem'] = total_mem
            tmp['used_mem'] = used_mem
            tmp['gpu_util'] = gpu_util
            tmp['gpu_mem_util'] = gpu_mem_util
            sum_info.append(tmp)

            running_process_obj_ls = nv.nvmlDeviceGetComputeRunningProcesses(h)
            for obj in running_process_obj_ls:
                process_pid = obj.pid
                process_type = 'C'
                process_raw_gpu_mem = obj.usedGpuMemory
                process_name = nv.nvmlSystemGetProcessName(
                    process_pid).decode()
                ctan_name = self.get_ctan_name_by_pid(process_pid)

                tmp = {}
                tmp['gpu_idx'] = str(gpu_idx)
                tmp['dev_name'] = dev_name
                tmp['process_pid'] = str(process_pid)
                tmp['process_type'] = process_type
                tmp['process_name'] = process_name
                tmp['process_gpu_mem'] = parse_unit(process_raw_gpu_mem, 1024)
                tmp['ctan_name'] = ctan_name
                process_ls.append(tmp)

            running_process_obj_ls = nv.nvmlDeviceGetGraphicsRunningProcesses(
                h)
            for obj in running_process_obj_ls:
                process_pid = obj.pid
                process_type = 'G'
                process_raw_gpu_mem = obj.usedGpuMemory
                process_name = nv.nvmlSystemGetProcessName(
                    process_pid).decode()
                ctan_name = self.get_ctan_name_by_pid(process_pid)

                tmp = {}
                tmp['gpu_idx'] = str(gpu_idx)
                tmp['dev_name'] = dev_name
                tmp['process_pid'] = str(process_pid)
                tmp['process_type'] = process_type
                tmp['process_name'] = process_name
                tmp['process_gpu_mem'] = parse_unit(process_raw_gpu_mem, 1024)
                tmp['ctan_name'] = ctan_name
                process_ls.append(tmp)
        return sum_info, process_ls
Ejemplo n.º 29
0
    def run(self):
        import random
        self.time_step = 0.01
        counter = 0
        print_counter = 0
        while (self.running):
            res = []
            for i in range(self.deviceCount):
                res.append(
                    nvidia_smi.nvmlDeviceGetUtilizationRates(self.GPUs[i]))

            # Print every self.print_time #
            if print_counter == int(self.print_time / self.time_step):
                # Print current #
                if self.print_current:
                    s = "\t[GPU] "
                    for i in range(self.deviceCount):
                        s += "Device %d %s : utilization : %d%%, memory : %d%%\t" % (
                            i, nvmlDeviceGetName(
                                self.GPUs[i]), res[i].gpu, res[i].memory)
                    logging.info(s)
                # Print avg #
                if self.print_time < 60:
                    logging.info(
                        "\n[GPU] Occupation over the last %d seconds" %
                        self.print_time)
                else:
                    minutes = self.print_time // 60
                    seconds = self.print_time % 60
                    logging.info(
                        "\n[GPU] Occupation over the last %d minutes, %d seconds"
                        % (minutes, seconds))

                s = "[GPU] "
                for i in range(self.deviceCount):
                    self.occAvgStep[i] /= (print_counter * self.time_step)
                    self.memAvgStep[i] /= (print_counter * self.time_step)
                    s += "Device %d %s : utilization : %d%%, memory : %d%%\t" % (
                        i, nvmlDeviceGetName(self.GPUs[i]), self.occAvgStep[i],
                        self.memAvgStep[i])
                    # Reinitialize average #
                    self.occAvgStep[i] = 0
                    self.memAvgStep[i] = 0
                logging.info(s)
                # reset printing counter #
                print_counter = 0

            # Add to total and step #
            for i in range(self.deviceCount):
                self.occAvgTot[i] += res[i].gpu * self.time_step
                self.occAvgStep[i] += res[i].gpu * self.time_step
                self.memAvgTot[i] += res[i].memory * self.time_step
                self.memAvgStep[i] += res[i].memory * self.time_step

            # Sleep and counters #
            print_counter += 1
            counter += 1
            sleep(self.time_step)

        # Print total #
        logging.info("[GPU] Average occupation over whole period")
        s = "[GPU] "
        for i in range(self.deviceCount):
            self.occAvgTot[i] /= (counter * self.time_step)
            self.memAvgTot[i] /= (counter * self.time_step)
            s += "Device %d %s : utilization : %d%%, memory : %d%%\t" % (
                i, nvmlDeviceGetName(
                    self.GPUs[i]), self.occAvgTot[i], self.memAvgTot[i])
        logging.info(s)
Ejemplo n.º 30
0
def gpu_mem_usage():
    return nvidia_smi.nvmlDeviceGetUtilizationRates(handle).memory / 100
Ejemplo n.º 31
0
def inference():
    useGpu = True

    fileName = "run_"
    if not useGpu:
        fileName += "cpu_"

    device = torch.device(
        "cuda:0" if useGpu and torch.cuda.is_available() else "cpu")

    os.makedirs('results', exist_ok=True)
    f = open(
        "results/" + fileName + str(int(round(time.time() * 1000))) + ".txt",
        "w+")
    f.write('=== Start time: ' + str(datetime.now()) + '\n')

    p = psutil.Process(os.getpid())
    nvidia_smi.nvmlInit()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)

    model = RedNet_model.RedNet(pretrained=False)
    load_ckpt(model, None, args.last_ckpt, device)
    model.eval()
    model.to(device)

    print('Starting list image files')
    filesCount = 0

    files = glob.glob("datasets/mestrado/**/rgb/*.png", recursive=True)
    files.extend(glob.glob("datasets/mestrado/**/rgb/*.jpg", recursive=True))
    cpuTimes = [0.0, 0.0, 0.0, 0.0]

    gpuTimes = 0.0
    gpuMemTimes = 0.0
    maxNumThreads = 0
    memUsageTimes = 0

    for imagePath in files:
        print('imagePath: ' + imagePath)
        pathRgb = Path(imagePath)
        datasetName = osp.basename(str(pathRgb.parent.parent))
        # print('datasetName: ' + datasetName)
        parentDatasetDir = str(pathRgb.parent.parent)
        # print('parentDatasetDir: ' + parentDatasetDir)
        depthImageName = os.path.basename(imagePath).replace('jpg', 'png')

        image = imageio.imread(imagePath)
        depth = imageio.imread(parentDatasetDir + '/depth/' + depthImageName)

        if datasetName == "active_vision" or datasetName == "putkk":
            image = image[0:1080, 240:1680]
            depth = depth[0:1080, 240:1680]
        elif datasetName == "semantics3d_mod":
            image = image[270:1080, 0:1080]
            depth = depth[270:1080, 0:1080]
        elif datasetName == "semantics3d_raw":
            image = image[64:1024, 0:1280]
            depth = depth[64:1024, 0:1280]

        # Bi-linear
        image = skimage.transform.resize(image, (image_h, image_w),
                                         order=1,
                                         mode='reflect',
                                         preserve_range=True)
        # Nearest-neighbor
        depth = skimage.transform.resize(depth, (image_h, image_w),
                                         order=0,
                                         mode='reflect',
                                         preserve_range=True)

        image = image / 255
        image = torch.from_numpy(image).float()
        depth = torch.from_numpy(depth).float()
        image = image.permute(2, 0, 1)
        depth.unsqueeze_(0)

        image = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                 std=[0.229, 0.224,
                                                      0.225])(image)
        depth = torchvision.transforms.Normalize(mean=[19050],
                                                 std=[9650])(depth)

        image = image.to(device).unsqueeze_(0)
        depth = depth.to(device).unsqueeze_(0)

        pred = model(image, depth)

        res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
        mem_res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
        curGpuTime = res.gpu
        #curGpuMemTime = res.memory #(in percent)
        curGpuMemTime = mem_res.used / 1e+6
        gpuTimes += curGpuTime
        gpuMemTimes += curGpuMemTime
        f.write('GPU Usage Percent: ' + str(curGpuTime) + '\n')
        f.write('GPU Mem Usage (MB)): ' + str(curGpuMemTime) + '\n')

        curProcessCpuPerU = p.cpu_percent()
        curCpusPerU = psutil.cpu_percent(interval=None, percpu=True)

        # gives a single float value
        for i in range(len(cpuTimes)):
            curProcessCpu = curProcessCpuPerU
            curCpu = curCpusPerU[i]
            cpuTimes[i] += curCpu
            f.write('Process CPU Percent: ' + str(curProcessCpu) +
                    ' --- CPU Percent: ' + str(curCpu) + '\n')

        # you can convert that object to a dictionary
        memInfo = dict(p.memory_full_info()._asdict())
        curMemUsage = memInfo['uss']
        memUsageTimes += curMemUsage

        f.write('Process memory usage: ' + str(curMemUsage / 1e+6) + '\n')
        f.write('Memory information: ' + str(memInfo) + '\n')

        if maxNumThreads < p.num_threads():
            maxNumThreads = p.num_threads()

        # print('############## Index: ')
        # print(index)
        os.makedirs('results/' + datasetName, exist_ok=True)

        output = utils.to_label(torch.max(pred, 1)[1] + 1)
        #output = utils.to_label(torch.max(pred, 1)[1] + 1)[0]
        #imageio.imsave('results/' + datasetName + '/' + depthImageName, output.cpu().numpy().transpose((1, 2, 0)))
        #imageio.imsave('results/' + datasetName + '/' + depthImageName, output)
        lbl_pil = PIL.Image.fromarray(output.astype(np.uint8), mode='P')
        lbl_pil.save('results/' + datasetName + '/' + depthImageName)
        filesCount = filesCount + 1

        del image, depth, pred, output

        torch.cuda.empty_cache()
    nvidia_smi.nvmlShutdown()

    start = time.time()
    for imagePath in files:
        pathRgb = Path(imagePath)
        datasetName = osp.basename(str(pathRgb.parent.parent))
        parentDatasetDir = str(pathRgb.parent.parent)
        depthImageName = os.path.basename(imagePath).replace('jpg', 'png')

        image = imageio.imread(imagePath)
        depth = imageio.imread(parentDatasetDir + '/depth/' + depthImageName)

        if datasetName == "active_vision" or datasetName == "putkk":
            image = image[0:1080, 240:1680]
            depth = depth[0:1080, 240:1680]
        elif datasetName == "semantics3d_mod":
            image = image[270:1080, 0:1080]
            depth = depth[270:1080, 0:1080]
        elif datasetName == "semantics3d_raw":
            image = image[64:1024, 0:1280]
            depth = depth[64:1024, 0:1280]

        # Bi-linear
        image = skimage.transform.resize(image, (image_h, image_w),
                                         order=1,
                                         mode='reflect',
                                         preserve_range=True)
        # Nearest-neighbor
        depth = skimage.transform.resize(depth, (image_h, image_w),
                                         order=0,
                                         mode='reflect',
                                         preserve_range=True)

        image = image / 255
        image = torch.from_numpy(image).float()
        depth = torch.from_numpy(depth).float()
        image = image.permute(2, 0, 1)
        depth.unsqueeze_(0)

        image = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                 std=[0.229, 0.224,
                                                      0.225])(image)
        depth = torchvision.transforms.Normalize(mean=[19050],
                                                 std=[9650])(depth)

        image = image.to(device).unsqueeze_(0)
        depth = depth.to(device).unsqueeze_(0)

        pred = model(image, depth)

        del image, depth, pred

        #torch.cuda.empty_cache()
    end = time.time()

    f.write('=== Mean GPU Usage Percent: ' + str(gpuTimes / filesCount) + '\n')
    f.write('=== Mean GPU Mem Usage (MB): ' + str(gpuMemTimes / filesCount) +
            '\n')
    for i in range(len(cpuTimes)):
        f.write("=== Mean cpu" + str(i) + " usage: " +
                str(cpuTimes[i] / filesCount) + '\n')
    f.write("=== Mean memory usage (MB): " +
            str((memUsageTimes / filesCount) / 1e+6) + '\n')

    f.write("=== Total image predicted: " + str(filesCount) + '\n')
    f.write("=== Seconds per image: " + str(((end - start) / filesCount)) +
            '\n')
    f.write("=== Max num threads: " + str(maxNumThreads) + '\n')

    f.write('=== End time: ' + str(datetime.now()) + '\n')
    f.close()