Esempi in Python per max_memory_allocated, esempi in Python per torch.cuda.max_memory_allocated

Esempio n. 1

0

Mostra file

File: conftest.py Progetto: fwilliams/falkon

def memory_checker(opt: FalkonOptions, extra_mem=0):
    is_cpu = opt.use_cpu
    mem_check = False
    if (is_cpu and opt.max_cpu_mem < np.inf) or (not is_cpu
                                                 and opt.max_gpu_mem < np.inf):
        mem_check = True

    start_ram = None
    if mem_check and not is_cpu:
        devices = list(range(torch.cuda.device_count()))
        start_ram = {}
        for dev in devices:
            tcd.reset_peak_memory_stats(dev)
            # We have to work around buggy memory stats: sometimes reset doesn't work as expected.
            start_ram[dev] = torch.cuda.max_memory_allocated(dev)
    elif mem_check:
        start_ram = _cpu_used_mem(uss=True)
        opt = dataclasses.replace(opt, max_cpu_mem=opt.max_cpu_mem + start_ram)

    yield opt

    # Check memory usage
    if mem_check and not is_cpu:
        devices = list(range(torch.cuda.device_count()))
        for dev in devices:
            used_ram = tcd.max_memory_allocated(
                dev) - start_ram[dev] - extra_mem
            assert used_ram <= opt.max_gpu_mem, \
                "DEV %d - Memory usage (%.2fMB) exceeds allowed usage (%.2fMB)" % \
                (dev, used_ram / 2 ** 20, opt.max_gpu_mem / 2 ** 20)
    elif mem_check:
        used_ram = _cpu_used_mem(uss=True) - start_ram - extra_mem
        assert used_ram <= opt.max_cpu_mem, \
            "Memory usage (%.2fMB) exceeds allowed usage (%.2fMB)" % \
            (used_ram / 2 ** 20, opt.max_cpu_mem / 2 ** 20)

Esempio n. 2

0

Mostra file

 def get_gpu_statistics(self):
     id = cuda.current_device()
     print("Max memory allocated on GPU %d: %d bytes" %
           (id, cuda.max_memory_allocated(id)))
     print("Max memory cached on GPU %d: %d bytes" %
           (id, cuda.max_memory_cached(id)))
     print("Current memory allocated on GPU %d: %d bytes" %
           (id, cuda.memory_allocated(id)))
     print("Current memory cached on GPU %d: %d bytes" %
           (id, cuda.memory_cached(id)))

Esempio n. 3

0

Mostra file

def get_memory_cost(fun, *samples):
    # warm up
    # fun(*samples)
    benchmark = torch.backends.cudnn.benchmark
    torch.backends.cudnn.benchmark = False  # conservatively estimate to avoid out of memory in the first calling

    for i in range(GPU_NUM):
        cuda.reset_peak_memory_stats(i)

    max_used_memory_pre = sum(
        [cuda.max_memory_allocated(i) for i in range(GPU_NUM)])
    fun(*samples)
    max_used_memory_post = sum(
        [cuda.max_memory_allocated(i) for i in range(GPU_NUM)])

    memory_cost = max_used_memory_post - max_used_memory_pre

    torch.backends.cudnn.benchmark = benchmark

    return max(memory_cost, 1)

Esempio n. 4

0

Mostra file

File: main.py Progetto: SudarshiniTyagi/cloud_ml_project1

def train(train_loader, model, criterion, optimizer, epoch, args):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(len(train_loader),
                             [batch_time, data_time, losses, top1, top5],
                             prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            images = images.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)

        # compute output
        output = model(images)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 500 == 0:
            progress.display(i)
    # gpu memory usage
    # gpu memory usage
    from torch.cuda import max_memory_allocated, reset_max_memory_allocated
    print("max gpu memory used = {0}".format(max_memory_allocated()))
    reset_max_memory_allocated()

Esempio n. 5

0

Mostra file

File: detecter.py Progetto: manmanCover/FADNet

def detect(opt):
    model = opt.model
    result_path = opt.rp
    file_list = opt.filelist
    filepath = opt.filepath
    
    if not os.path.exists(result_path):
        os.makedirs(result_path)

    devices = [int(item) for item in opt.devices.split(',')]
    ngpu = len(devices)
    #net = DispNetC(ngpu, True)
    #net = DispNetCSRes(ngpu, False, True)
    #net = DispNetCSResWithMono(ngpu, False, True, input_channel=3)

    if opt.net == "psmnet" or opt.net == "ganet":
        net = build_net(opt.net)(maxdisp=192)
    elif opt.net == "dispnetc":
        net = build_net(opt.net)(batchNorm=False, lastRelu=True, resBlock=False)
    else:
        net = build_net(opt.net)(batchNorm=False, lastRelu=True)
 
    net = torch.nn.DataParallel(net, device_ids=devices).cuda()

    model_data = torch.load(model)
    print(model_data.keys())
    if 'state_dict' in model_data.keys():
        net.load_state_dict(model_data['state_dict'])
    else:
        net.load_state_dict(model_data)

    num_of_parameters = count_parameters(net)
    print('Model: %s, # of parameters: %d' % (opt.net, num_of_parameters))

    net.eval()

    batch_size = int(opt.batchSize)
    test_dataset = DispDataset(txt_file=file_list, root_dir=filepath, phase='detect')
    test_loader = DataLoader(test_dataset, batch_size = batch_size, \
                        shuffle = False, num_workers = 1, \
                        pin_memory = True)

    s = time.time()
    #high_res_EPE = multiscaleloss(scales=1, downscale=1, weights=(1), loss='L1', sparse=False)

    avg_time = []
    display = 100
    warmup = 10
    for i, sample_batched in enumerate(test_loader):
        input = torch.cat((sample_batched['img_left'], sample_batched['img_right']), 1)
        # print('input Shape: {}'.format(input.size()))
        num_of_samples = input.size(0)
        target = sample_batched['gt_disp']

        #print('disp Shape: {}'.format(target.size()))
        #original_size = (1, target.size()[2], target.size()[3])

        target = target.cuda()
        input = input.cuda()
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        if i > warmup:
            ss = time.time()
        if opt.net == "psmnet" or opt.net == "ganet":
            output = net(input_var)
        elif opt.net == "dispnetc":
            output = net(input_var)[0]
        else:
            output = net(input_var)[-1] 
 
        if i > warmup:
            avg_time.append((time.time() - ss))
            if (i - warmup) % display == 0:
                print('Average inference time: %f' % np.mean(avg_time))
                mbytes = 1024.*1024
                print('GPU memory usage memory_allocated: %d MBytes, max_memory_allocated: %d MBytes, memory_cached: %d MBytes, max_memory_cached: %d MBytes, CPU memory usage: %d MBytes' %  \
                    (ct.memory_allocated()/mbytes, ct.max_memory_allocated()/mbytes, ct.memory_cached()/mbytes, ct.max_memory_cached()/mbytes, process.memory_info().rss/mbytes))
                avg_time = []

        # output = net(input_var)[1]
        output[output > 192] = 0
        output = scale_disp(output, (output.size()[0], 540, 960))
        for j in range(num_of_samples):
            # scale back depth
            np_depth = output[j][0].data.cpu().numpy()
            gt_depth = target_var[j, 0, :, :].data.cpu().numpy()
            #print(np.min(np_depth), np.max(np_depth))
            #cuda_depth = torch.from_numpy(np_depth).cuda()
            #cuda_depth = torch.autograd.Variable(cuda_depth, volatile=True)

            # flow2_EPE = high_res_EPE(output[j], target_var[j]) * 1.0
            #flow2_EPE = high_res_EPE(cuda_depth, target_var[j]) * 1.0
            #print('Shape: {}'.format(output[j].size()))
            print('Batch[{}]: {}, average disp: {}'.format(i, j, np.mean(np_depth)))
            #print('Batch[{}]: {}, Flow2_EPE: {}'.format(i, sample_batched['img_names'][0][j], flow2_EPE.data.cpu().numpy()))

            name_items = sample_batched['img_names'][0][j].split('/')
            #save_name = '_'.join(name_items).replace('.png', '.pfm')# for girl02 dataset
            #save_name = 'predict_{}_{}_{}.pfm'.format(name_items[-4], name_items[-3], name_items[-1].split('.')[0])
            #save_name = 'predict_{}_{}.pfm'.format(name_items[-1].split('.')[0], name_items[-1].split('.')[1])
            #save_name = 'predict_{}.pfm'.format(name_items[-1])
            #img = np.flip(np_depth[0], axis=0)

            save_name = '_'.join(name_items)# for girl02 dataset
            img = np_depth
            print('Name: {}'.format(save_name))
            print('')
            #save_pfm('{}/{}'.format(result_path, save_name), img)
            skimage.io.imsave(os.path.join(result_path, save_name),(img*256).astype('uint16'))
            
            save_name = '_'.join(name_items).replace(".png", "_gt.png")# for girl02 dataset
            img = gt_depth
            print('Name: {}'.format(save_name))
            print('')
            #save_pfm('{}/{}'.format(result_path, save_name), img)
            skimage.io.imsave(os.path.join(result_path, save_name),(img*256).astype('uint16'))


    print('Evaluation time used: {}'.format(time.time()-s))

Esempio n. 6

0

Mostra file

def main(config):
    # For fast training.
    cudnn.benchmark = True

    # Create directories if not exist.
    if not os.path.exists(config.log_dir):
        os.makedirs(config.log_dir)
    if not os.path.exists(config.model_save_dir):
        os.makedirs(config.model_save_dir)
    if not os.path.exists(config.sample_dir):
        os.makedirs(config.sample_dir)
    if not os.path.exists(config.result_dir):
        os.makedirs(config.result_dir)

    # Data loader.
    celeba_loader = None
    rafd_loader = None

    if config.dataset in ['CelebA', 'Both']:
        if config.CelebA_data_loader_load_dir == "":
            celeba_loader = get_loader(config.celeba_image_dir,
                                       config.attr_path, config.selected_attrs,
                                       config.celeba_crop_size,
                                       config.image_size, config.batch_size,
                                       'CelebA', config.mode,
                                       config.num_workers)
        else:
            with open(config.CelebA_data_loader_load_dir, "rb") as f:
                celeba_loader = pickle.load(f)

    if config.dataset in ['RaFD', 'Both']:
        if config.RaFD_data_loader_load_dir == "":
            rafd_loader = get_loader(config.rafd_image_dir, None, None,
                                     config.rafd_crop_size, config.image_size,
                                     config.batch_size, 'RaFD', config.mode,
                                     config.num_workers)
        else:
            with open(config.RaFD_data_loader_load_dir, "rb") as f:
                rafd_loader = pickle.load(f)

    # Solver for training and testing StarGAN.
    solver = Solver(celeba_loader, rafd_loader, config)

    if config.CelebA_data_loader_save_dir != "":
        with open(config.CelebA_data_loader_save_dir, "wb") as f:
            pickle.dump(celeba_loader, f)

    if config.RaFD_data_loader_save_dir != "":
        with open(config.RaFD_data_loader_save_dir, "wb") as f:
            pickle.dump(rafd_loader, f)

    print("mem-reserved:", max_memory_reserved())
    print("mem-allocated:", max_memory_allocated())
    if config.mode == 'train':
        print("mem-reserved:", max_memory_reserved())
        print("mem-allocated:", max_memory_allocated())
        if config.dataset in ['CelebA', 'RaFD']:
            solver.train()
        elif config.dataset in ['Both']:
            solver.train_multi()
        print("mem-reserved:", max_memory_reserved())
        print("mem-allocated:", max_memory_allocated())
    elif config.mode == 'test':
        if config.dataset in ['CelebA', 'RaFD']:
            solver.test()
        elif config.dataset in ['Both']:
            solver.test_multi()

Esempio n. 7

0

Mostra file

def detect(opt):

    net_name = opt.net
    model = opt.model
    result_path = opt.rp
    file_list = opt.filelist
    filepath = opt.filepath

    if not os.path.exists(result_path):
        os.makedirs(result_path)

    devices = [int(item) for item in opt.devices.split(',')]
    ngpu = len(devices)

    # build net according to the net name
    if net_name == "psmnet" or net_name == "ganet":
        net = build_net(net_name)(192)
    elif net_name in ["fadnet", "dispnetc"]:
        net = build_net(net_name)(batchNorm=False, lastRelu=True)

    net = torch.nn.DataParallel(net, device_ids=devices).cuda()

    model_data = torch.load(model)
    print(model_data.keys())
    if 'state_dict' in model_data.keys():
        net.load_state_dict(model_data['state_dict'])
    else:
        net.load_state_dict(model_data)

    num_of_parameters = count_parameters(net)
    print('Model: %s, # of parameters: %d' % (net_name, num_of_parameters))

    net.eval()

    batch_size = int(opt.batchSize)
    test_dataset = StereoDataset(txt_file=file_list,
                                 root_dir=filepath,
                                 phase='detect')
    test_loader = DataLoader(test_dataset, batch_size = batch_size, \
                        shuffle = False, num_workers = 1, \
                        pin_memory = True)

    s = time.time()

    avg_time = []
    display = 50
    warmup = 10
    for i, sample_batched in enumerate(test_loader):
        #if i > 215:
        #    break

        input = torch.cat(
            (sample_batched['img_left'], sample_batched['img_right']), 1)

        # print('input Shape: {}'.format(input.size()))
        num_of_samples = input.size(0)

        #output, input_var = detect_batch(net, sample_batched, opt.net, (540, 960))

        input = input.cuda()
        input_var = torch.autograd.Variable(input, volatile=True)

        if i > warmup:
            ss = time.time()

        with torch.no_grad():
            if opt.net == "psmnet" or opt.net == "ganet":
                output = net(input_var)
                output = output.unsqueeze(1)
            elif opt.net == "dispnetc":
                output = net(input_var)[0]
            else:
                output = net(input_var)[-1]

        if i > warmup:
            avg_time.append((time.time() - ss))
            if (i - warmup) % display == 0:
                print('Average inference time: %f' % np.mean(avg_time))
                mbytes = 1024. * 1024
                print('GPU memory usage memory_allocated: %d MBytes, max_memory_allocated: %d MBytes, memory_cached: %d MBytes, max_memory_cached: %d MBytes, CPU memory usage: %d MBytes' %  \
                    (ct.memory_allocated()/mbytes, ct.max_memory_allocated()/mbytes, ct.memory_cached()/mbytes, ct.max_memory_cached()/mbytes, process.memory_info().rss/mbytes))
                avg_time = []

        output = scale_disp(output, (output.size()[0], 540, 960))
        disp = output[:, 0, :, :]

        for j in range(num_of_samples):

            name_items = sample_batched['img_names'][0][j].split('/')
            # write disparity to file
            output_disp = disp[j]
            np_disp = disp[j].data.cpu().numpy()

            print('Batch[{}]: {}, average disp: {}({}-{}).'.format(
                i, j, np.mean(np_disp), np.min(np_disp), np.max(np_disp)))
            save_name = '_'.join(name_items).replace(
                ".png", "_d.png")  # for girl02 dataset
            print('Name: {}'.format(save_name))

            skimage.io.imsave(os.path.join(result_path, save_name),
                              (np_disp * 256).astype('uint16'))

            #save_name = '_'.join(name_items).replace("png", "pfm")# for girl02 dataset
            #print('Name: {}'.format(save_name))
            #np_disp = np.flip(np_disp, axis=0)
            #save_pfm('{}/{}'.format(result_path, save_name), np_disp)

    print('Evaluation time used: {}'.format(time.time() - s))

Esempio n. 8

0

Mostra file

def get_memory_use():
    device = cuda.current_device()
    message = cuda.get_device_name(device) + ':\n'
    message += 'allocated:' + str(cuda.memory_allocated(device)) + '/' + str(cuda.max_memory_allocated()) + '\n'
    message += 'cached:' + str(cuda.memory_cached(device)) + '/' + str(cuda.max_memory_cached()) + '\n'
    return message

Esempio n. 9

0

Mostra file

File: cifar10.py Progetto: klensink/HyperNet

                        '   Batch   Acc      Loss     Prec         EPS Fwd   EPS Back      Alloc     Cached     %ModelAlloc   %ModelCached'
                    )
                else:

                    # Calc precision of recovery
                    if not autograd:
                        d = (Y - images).norm() / images.norm()
                    else:
                        d = torch.tensor([0])

                    print(
                        '  %6d,  %6.4f,  %6.4f,  %6.4e,  %6.1f,   %6.1f,       %6.3f,   %6.3f,   %6.3f,       %6.3f'
                        %
                        (i, np.mean(acc), loss.item(), d.item(),
                         np.mean(eps_fwd), np.mean(eps_back),
                         byte2mb(cuda.max_memory_allocated()),
                         byte2mb(cuda.max_memory_cached()), model_size(net) /
                         byte2mb(cuda.max_memory_allocated()),
                         model_size(net) / byte2mb(cuda.max_memory_cached())))
                    acc = []
                    start_time = time.time()
                    eps_back = []
                    eps_fwd = []

                    # cuda.reset_max_memory_allocated()
                    # cuda.reset_max_memory_cached()

        # Val set
        acc = []
        with torch.no_grad():
            for i, (images, labels) in enumerate(val_loader):

Esempio n. 10

0

Mostra file

def detect(opt):

    net_name = opt.net
    model = opt.model
    result_path = opt.rp
    file_list = opt.filelist
    filepath = opt.filepath

    if not os.path.exists(result_path):
        os.makedirs(result_path)

    devices = [int(item) for item in opt.devices.split(',')]
    ngpu = len(devices)

    # build net according to the net name
    if net_name in ["dispnetcres", "dispnetc"]:
        net = build_net(net_name)(batchNorm=False, lastRelu=True)
    else:
        net = build_net(net_name)(batchNorm=False, lastRelu=True)
        net.set_focal_length(1050.0, 1050.0)
    net = torch.nn.DataParallel(net, device_ids=devices).cuda()
    #net.cuda()

    model_data = torch.load(model)
    print(model_data.keys())
    if 'state_dict' in model_data.keys():
        net.load_state_dict(model_data['state_dict'])
    else:
        net.load_state_dict(model_data)

    num_of_parameters = count_parameters(net)
    print('Model: %s, # of parameters: %d' % (net_name, num_of_parameters))

    net.eval()

    batch_size = int(opt.batchSize)
    #test_dataset = StereoDataset(txt_file=file_list, root_dir=filepath, phase='detect')
    test_dataset = SceneFlowDataset(txt_file=file_list,
                                    root_dir=filepath,
                                    phase='detect')
    test_loader = DataLoader(test_dataset, batch_size = batch_size, \
                        shuffle = False, num_workers = 1, \
                        pin_memory = True)

    s = time.time()
    #high_res_EPE = multiscaleloss(scales=1, downscale=1, weights=(1), loss='L1', sparse=False)

    avg_time = []
    display = 100
    warmup = 10
    for i, sample_batched in enumerate(test_loader):
        input = torch.cat(
            (sample_batched['img_left'], sample_batched['img_right']), 1)
        if opt.disp_on:
            target_disp = sample_batched['gt_disp']
            target_disp = target_disp.cuda()
        if opt.norm_on:
            target_norm = sample_batched['gt_norm']
            target_norm = target_norm.cuda()

        # print('input Shape: {}'.format(input.size()))
        num_of_samples = input.size(0)

        #output, input_var = detect_batch(net, sample_batched, opt.net, (540, 960))

        input = input.cuda()
        input_var = torch.autograd.Variable(input, volatile=True)

        if i > warmup:
            ss = time.time()
        if opt.net == "psmnet" or opt.net == "ganet":
            output = net(input_var)
        elif opt.net == "dispnetc":
            output = net(input_var)[0]
        elif opt.net in ["dispnormnet", "dtonnet", "dnfusionnet"]:
            output = net(input_var)
            disp = output[0]
            normal = output[1]
            output = torch.cat((normal, disp), 1)
        else:
            output = net(input_var)[-1]

        if i > warmup:
            avg_time.append((time.time() - ss))
            if (i - warmup) % display == 0:
                print('Average inference time: %f' % np.mean(avg_time))
                mbytes = 1024. * 1024
                print('GPU memory usage memory_allocated: %d MBytes, max_memory_allocated: %d MBytes, memory_cached: %d MBytes, max_memory_cached: %d MBytes, CPU memory usage: %d MBytes' %  \
                    (ct.memory_allocated()/mbytes, ct.max_memory_allocated()/mbytes, ct.memory_cached()/mbytes, ct.max_memory_cached()/mbytes, process.memory_info().rss/mbytes))
                avg_time = []

        # output = net(input_var)[1]
        if opt.disp_on and not opt.norm_on:
            output = scale_disp(output, (output.size()[0], 540, 960))
            disp = output[:, 0, :, :]
        elif opt.disp_on and opt.norm_on:
            output = scale_norm(output, (output.size()[0], 4, 540, 960))
            disp = output[:, 3, :, :]
            normal = output[:, :3, :, :]
        print('disp shape:', disp.shape)

        for j in range(num_of_samples):

            name_items = sample_batched['img_names'][0][j].split('/')
            # write disparity to file
            if opt.disp_on:
                output_disp = disp[j]
                _target_disp = target_disp[j, 0]
                target_valid = _target_disp < 192
                print('target size', _target_disp.size())
                print('output size', output_disp.size())
                epe = F.smooth_l1_loss(output_disp[target_valid],
                                       _target_disp[target_valid],
                                       size_average=True)
                print('EPE: {}'.format(epe))

                np_disp = disp[j].data.cpu().numpy()

                print('Batch[{}]: {}, average disp: {}({}-{}).'.format(
                    i, j, np.mean(np_disp), np.min(np_disp), np.max(np_disp)))
                save_name = '_'.join(name_items).replace(".png", "_d.png")
                print('Name: {}'.format(save_name))

                skimage.io.imsave(os.path.join(result_path, save_name),
                                  (np_disp * 256).astype('uint16'))
            #save_name = '_'.join(name_items).replace(".png", "_d.pfm")
            #print('Name: {}'.format(save_name))
            #np_disp = np.flip(np_disp, axis=0)
            #save_pfm('{}/{}'.format(result_path, save_name), np_disp)

            if opt.norm_on:
                normal[j] = (normal[j] + 1.0) * 0.5
                #np_normal = normal[j].data.cpu().numpy().transpose([1, 2, 0])
                np_normal = normal[j].data.cpu().numpy()
                #save_name = '_'.join(name_items).replace('.png', '_n.png')
                save_name = '_'.join(name_items).replace('.png', '_n.exr')
                print('Name: {}'.format(save_name))
                #skimage.io.imsave(os.path.join(result_path, save_name),(normal*256).astype('uint16'))
                #save_pfm('{}/{}'.format(result_path, save_name), img)
                save_exr(np_normal, '{}/{}'.format(result_path, save_name))

            print('')

            #save_name = '_'.join(name_items).replace(".png", "_left.png")
            #img = input_var[0].detach().cpu().numpy()[:3,:,:]
            #img = np.transpose(img, (1, 2, 0))
            #print('Name: {}'.format(save_name))
            #print('')
            ##save_pfm('{}/{}'.format(result_path, save_name), img)
            #skimage.io.imsave(os.path.join(result_path, save_name),img)

    print('Evaluation time used: {}'.format(time.time() - s))

Esempio n. 11

0

Mostra file

    def train_multi(self):
        print("mem-reserved:",max_memory_reserved())
        print("mem-allocated:",max_memory_allocated())
        """Train StarGAN with multiple datasets."""        
        # Data iterators.
        celeba_iter = iter(self.celeba_loader)
        rafd_iter = iter(self.rafd_loader)

        # Fetch fixed inputs for debugging.
        x_fixed, c_org = next(celeba_iter)
        x_fixed = x_fixed.to(self.device)
        c_celeba_list = self.create_labels(c_org, self.c_dim, 'CelebA', self.selected_attrs)
        c_rafd_list = self.create_labels(c_org, self.c2_dim, 'RaFD')
        zero_celeba = torch.zeros(x_fixed.size(0), self.c_dim).to(self.device)           # Zero vector for CelebA.
        zero_rafd = torch.zeros(x_fixed.size(0), self.c2_dim).to(self.device)             # Zero vector for RaFD.
        mask_celeba = self.label2onehot(torch.zeros(x_fixed.size(0)), 2).to(self.device)  # Mask vector: [1, 0].
        mask_rafd = self.label2onehot(torch.ones(x_fixed.size(0)), 2).to(self.device)     # Mask vector: [0, 1].

        # Learning rate cache for decaying.
        g_lr = self.g_lr
        d_lr = self.d_lr

        # Start training from scratch or resume training.
        start_iters = 0
        if self.resume_iters:
            start_iters = self.resume_iters
            self.restore_model(self.resume_iters)

        # Start training.
        print('Start training...')
        start_time = time.time()
        for i in range(start_iters, self.num_iters):
            for dataset in ['CelebA', 'RaFD']:

                # =================================================================================== #
                #                             1. Preprocess input data                                #
                # =================================================================================== #
                
                # Fetch real images and labels.
                data_iter = celeba_iter if dataset == 'CelebA' else rafd_iter
                
                try:
                    x_real, label_org = next(data_iter)
                except:
                    if dataset == 'CelebA':
                        celeba_iter = iter(self.celeba_loader)
                        x_real, label_org = next(celeba_iter)
                    elif dataset == 'RaFD':
                        rafd_iter = iter(self.rafd_loader)
                        x_real, label_org = next(rafd_iter)

                # Generate target domain labels randomly.
                rand_idx = torch.randperm(label_org.size(0))
                label_trg = label_org[rand_idx]

                if dataset == 'CelebA':
                    c_org = label_org.clone()
                    c_trg = label_trg.clone()
                    zero = torch.zeros(x_real.size(0), self.c2_dim)
                    mask = self.label2onehot(torch.zeros(x_real.size(0)), 2)
                    c_org = torch.cat([c_org, zero, mask], dim=1)
                    c_trg = torch.cat([c_trg, zero, mask], dim=1)
                elif dataset == 'RaFD':
                    c_org = self.label2onehot(label_org, self.c2_dim)
                    c_trg = self.label2onehot(label_trg, self.c2_dim)
                    zero = torch.zeros(x_real.size(0), self.c_dim)
                    mask = self.label2onehot(torch.ones(x_real.size(0)), 2)
                    c_org = torch.cat([zero, c_org, mask], dim=1)
                    c_trg = torch.cat([zero, c_trg, mask], dim=1)

                x_real = x_real.to(self.device)             # Input images.
                c_org = c_org.to(self.device)               # Original domain labels.
                c_trg = c_trg.to(self.device)               # Target domain labels.
                label_org = label_org.to(self.device)       # Labels for computing classification loss.
                label_trg = label_trg.to(self.device)       # Labels for computing classification loss.

                # =================================================================================== #
                #                             2. Train the discriminator                              #
                # =================================================================================== #

                # Compute loss with real images.
                out_src, out_cls = self.D(x_real)
                out_cls = out_cls[:, :self.c_dim] if dataset == 'CelebA' else out_cls[:, self.c_dim:]
                d_loss_real = - torch.mean(out_src)
                d_loss_cls = self.classification_loss(out_cls, label_org, dataset)

                # Compute loss with fake images.
                x_fake = self.G(x_real, c_trg)
                out_src, _ = self.D(x_fake.detach())
                d_loss_fake = torch.mean(out_src)

                # Compute loss for gradient penalty.
                alpha = torch.rand(x_real.size(0), 1, 1, 1).to(self.device)
                x_hat = (alpha * x_real.data + (1 - alpha) * x_fake.data).requires_grad_(True)
                out_src, _ = self.D(x_hat)
                d_loss_gp = self.gradient_penalty(out_src, x_hat)

                # Backward and optimize.
                d_loss = d_loss_real + d_loss_fake + self.lambda_cls * d_loss_cls + self.lambda_gp * d_loss_gp
                self.reset_grad()
                d_loss.backward()
                self.d_optimizer.step()

                # Logging.
                loss = {}
                loss['D/loss_real'] = d_loss_real.item()
                loss['D/loss_fake'] = d_loss_fake.item()
                loss['D/loss_cls'] = d_loss_cls.item()
                loss['D/loss_gp'] = d_loss_gp.item()
            
                # =================================================================================== #
                #                               3. Train the generator                                #
                # =================================================================================== #

                if (i+1) % self.n_critic == 0:
                    # Original-to-target domain.
                    x_fake = self.G(x_real, c_trg)
                    out_src, out_cls = self.D(x_fake)
                    out_cls = out_cls[:, :self.c_dim] if dataset == 'CelebA' else out_cls[:, self.c_dim:]
                    g_loss_fake = - torch.mean(out_src)
                    g_loss_cls = self.classification_loss(out_cls, label_trg, dataset)

                    # Target-to-original domain.
                    x_reconst = self.G(x_fake, c_org)
                    g_loss_rec = torch.mean(torch.abs(x_real - x_reconst))

                    # Backward and optimize.
                    g_loss = g_loss_fake + self.lambda_rec * g_loss_rec + self.lambda_cls * g_loss_cls
                    self.reset_grad()
                    g_loss.backward()
                    self.g_optimizer.step()

                    # Logging.
                    loss['G/loss_fake'] = g_loss_fake.item()
                    loss['G/loss_rec'] = g_loss_rec.item()
                    loss['G/loss_cls'] = g_loss_cls.item()

                # =================================================================================== #
                #                                 4. Miscellaneous                                    #
                # =================================================================================== #

                # Print out training info.
                if (i+1) % self.log_step == 0:
                    et = time.time() - start_time
                    et = str(datetime.timedelta(seconds=et))[:-7]
                    log = "Elapsed [{}], Iteration [{}/{}], Dataset [{}]".format(et, i+1, self.num_iters, dataset)
                    for tag, value in loss.items():
                        log += ", {}: {:.4f}".format(tag, value)
                    print(log)

                    if self.use_tensorboard:
                        for tag, value in loss.items():
                            self.logger.scalar_summary(tag, value, i+1)

            # Translate fixed images for debugging.
            if (i+1) % self.sample_step == 0:
                with torch.no_grad():
                    x_fake_list = [x_fixed]
                    for c_fixed in c_celeba_list:
                        c_trg = torch.cat([c_fixed, zero_rafd, mask_celeba], dim=1)
                        x_fake_list.append(self.G(x_fixed, c_trg))
                    for c_fixed in c_rafd_list:
                        c_trg = torch.cat([zero_celeba, c_fixed, mask_rafd], dim=1)
                        x_fake_list.append(self.G(x_fixed, c_trg))
                    x_concat = torch.cat(x_fake_list, dim=3)
                    sample_path = os.path.join(self.sample_dir, '{}-images.jpg'.format(i+1))
                    save_image(self.denorm(x_concat.data.cpu()), sample_path, nrow=1, padding=0)
                    print('Saved real and fake images into {}...'.format(sample_path))

            # Save model checkpoints.
            if (i+1) % self.model_save_step == 0:
                G_path = os.path.join(self.model_save_dir, '{}-G.ckpt'.format(i+1))
                D_path = os.path.join(self.model_save_dir, '{}-D.ckpt'.format(i+1))
                torch.save(self.G.state_dict(), G_path)
                torch.save(self.D.state_dict(), D_path)
                print('Saved model checkpoints into {}...'.format(self.model_save_dir))

            # Decay learning rates.
            if (i+1) % self.lr_update_step == 0 and (i+1) > (self.num_iters - self.num_iters_decay):
                g_lr -= (self.g_lr / float(self.num_iters_decay))
                d_lr -= (self.d_lr / float(self.num_iters_decay))
                self.update_lr(g_lr, d_lr)
                print ('Decayed learning rates, g_lr: {}, d_lr: {}.'.format(g_lr, d_lr))
        print("mem-reserved:",max_memory_reserved())
        print("mem-allocated:",max_memory_allocated())

Esempio n. 12

0

Mostra file

    def train(self, num_of_iters=1, data=None, hidden=None):
        self.loss = 0.0
        s = time.time()

        for i in range(num_of_iters):
            self.adjust_learning_rate(self.train_epoch, self.optimizer)
            if self.train_iter % self.num_batches_per_epoch == 0 and self.train_iter > 0:
                logger.info('train iter: %d, num_batches_per_epoch: %d',
                            self.train_iter, self.num_batches_per_epoch)
                logger.info(
                    'Epoch %d, avg train acc: %f, lr: %f, avg loss: %f' %
                    (self.train_iter // self.num_batches_per_epoch,
                     np.mean(self.train_acc_top1), self.lr,
                     self.avg_loss_per_epoch / self.num_batches_per_epoch))
                mean_s = np.mean(self.sparsities)
                if self.train_iter > 0 and np.isnan(mean_s):
                    logger.warn('NaN detected! sparsities:  %s' %
                                self.sparsities)
                logger.info(
                    'Average Sparsity: %f, compression ratio: %f, communication size: %f',
                    np.mean(self.sparsities), np.mean(self.compression_ratios),
                    np.mean(self.communication_sizes))
                if self.rank == 0 and self.writer is not None:
                    self.writer.add_scalar(
                        'cross_entropy',
                        self.avg_loss_per_epoch / self.num_batches_per_epoch,
                        self.train_epoch)
                    self.writer.add_scalar('top-1 acc',
                                           np.mean(self.train_acc_top1),
                                           self.train_epoch)
                if self.rank == 0:
                    self.test(self.train_epoch)
                self.sparsities = []
                self.compression_ratios = []
                self.communication_sizes = []
                self.train_acc_top1 = []
                self.epochs_info.append(self.avg_loss_per_epoch /
                                        self.num_batches_per_epoch)
                self.avg_loss_per_epoch = 0.0
                if self.train_iter > 0 and self.rank == 0:
                    state = {
                        'iter': self.train_iter,
                        'epoch': self.train_epoch,
                        'state': self.get_model_state()
                    }
                    if self.prefix:
                        relative_path = './weights/%s/%s-n%d-bs%d-lr%.4f' % (
                            self.prefix, self.dnn, self.nworkers,
                            self.batch_size, self.base_lr)
                    else:
                        relative_path = './weights/%s-n%d-bs%d-lr%.4f' % (
                            self.dnn, self.nworkers, self.batch_size,
                            self.base_lr)
                    if settings.SPARSE:
                        relative_path += '-s%.5f' % self.sparsity
                    utils.create_path(relative_path)
                    filename = '%s-rank%d-epoch%d.pth' % (self.dnn, self.rank,
                                                          self.train_epoch)
                    fn = os.path.join(relative_path, filename)
                    #self.save_checkpoint(state, fn)
                    #self.remove_dict(state)
                self.train_epoch += 1
                if self.train_sampler and (self.nworkers > 1):
                    self.train_sampler.set_epoch(self.train_epoch)

            ss = time.time()
            if data is None:
                data = self.data_iter()

            if self.dataset == 'an4':
                inputs, labels_cpu, input_percentages, target_sizes = data
                input_sizes = input_percentages.mul_(int(inputs.size(3))).int()
            else:
                inputs, labels_cpu = data
            if self.is_cuda:
                if self.dnn == 'lstm':
                    inputs = Variable(inputs.transpose(0,
                                                       1).contiguous()).cuda()
                    labels = Variable(labels_cpu.transpose(
                        0, 1).contiguous()).cuda()
                else:
                    inputs, labels = inputs.cuda(
                        non_blocking=True), labels_cpu.cuda(non_blocking=True)
            else:
                labels = labels_cpu

            self.iotime += (time.time() - ss)

            if self.dnn == 'lstman4':
                out, output_sizes = self.net(inputs, input_sizes)
                out = out.transpose(0, 1)  # TxNxH
                loss = self.criterion(out, labels_cpu, output_sizes,
                                      target_sizes)
                loss = loss / inputs.size(0)  # average the loss by minibatch
                loss.backward()
            elif self.dnn == 'lstm':
                hidden = lstmpy.repackage_hidden(hidden)
                outputs, hidden = self.net(inputs, hidden)
                tt = torch.squeeze(
                    labels.view(-1, self.net.batch_size * self.net.num_steps))
                loss = self.criterion(outputs.view(-1, self.net.vocab_size),
                                      tt)
                loss.backward()
            else:
                # forward + backward + optimize
                outputs = self.net(inputs)
                loss = self.criterion(outputs, labels)
                loss.backward()
            loss_value = loss.item()
            # logger.info statistics
            self.loss += loss_value

            self.avg_loss_per_epoch += loss_value

            if self.dnn not in ['lstm', 'lstman4']:
                acc1, = self.cal_accuracy(outputs, labels, topk=(1, ))
                self.train_acc_top1.append(acc1)

            self.train_iter += 1
        self.num_of_updates_during_comm += 1
        self.loss /= num_of_iters
        self.timer += time.time() - s
        display = 100
        if self.train_iter % display == 0:
            logger.info(
                '[%3d][%5d/%5d][rank:%d] loss: %.3f, average forward and backward time: %f, iotime: %f '
                % (self.train_epoch, self.train_iter,
                   self.num_batches_per_epoch, self.rank, self.loss,
                   self.timer / display, self.iotime / display))
            mbytes = 1024. * 1024
            logger.info(
                'GPU memory usage memory_allocated: %d MBytes, max_memory_allocated: %d MBytes, memory_cached: %d MBytes, max_memory_cached: %d MBytes, CPU memory usage: %d MBytes',
                ct.memory_allocated() / mbytes,
                ct.max_memory_allocated() / mbytes,
                ct.memory_cached() / mbytes,
                ct.max_memory_cached() / mbytes,
                process.memory_info().rss / mbytes)
            self.timer = 0.0
            self.iotime = 0.0
            if self.is_cuda:
                torch.cuda.empty_cache()

        if self.dnn == 'lstm':
            return num_of_iters, hidden
        return num_of_iters

Esempio n. 13

0

Mostra file

File: detecter_rt.py Progetto: shyhuai/FADNet

def detect(opt):

    net_name = opt.net
    model = opt.model
    result_path = opt.rp
    file_list = opt.filelist
    filepath = opt.filepath

    if not os.path.exists(result_path):
        os.makedirs(result_path)

    devices = [int(item) for item in opt.devices.split(',')]
    ngpu = len(devices)

    # build net according to the net name
    if net_name == "psmnet" or net_name == "ganet":
        net = build_net(net_name)(192)
    elif net_name in ["fadnet", "dispnetc", "mobilefadnet", "slightfadnet"]:
        net = build_net(net_name)(batchNorm=False, lastRelu=True)
    #elif net_name in ["mobilefadnet", "slightfadnet"]:
    #    #B, max_disp, H, W = (wopt.batchSize, 40, 72, 120)
    #    shape = (opt.batchSize, 40, 72, 120) #TODO: Should consider how to dynamically use
    #    warp_size = (opt.batchSize, 3, 576, 960)
    #    net = build_net(net_name)(batchNorm=False, lastRelu=True, input_img_shape=shape, warp_size=warp_size)

    if ngpu > 1:
        net = torch.nn.DataParallel(net, device_ids=devices)

    model_data = torch.load(model)
    print(model_data.keys())
    if 'state_dict' in model_data.keys():
        #net.load_state_dict(model_data['state_dict'])
        load_model_trained_with_DP(net, model_data['state_dict'])
    else:
        net.load_state_dict(model_data)

    num_of_parameters = count_parameters(net)
    print('Model: %s, # of parameters: %d' % (net_name, num_of_parameters))

    batch_size = int(opt.batchSize)
    test_dataset = StereoDataset(txt_file=file_list,
                                 root_dir=filepath,
                                 phase='detect')
    test_loader = DataLoader(test_dataset, batch_size = batch_size, \
                        shuffle = False, num_workers = 1, \
                        pin_memory = True)

    net.eval()
    #net.dispnetc.eval()
    #net.dispnetres.eval()
    net = net.cuda()

    #for i, sample_batched in enumerate(test_loader):
    #    input = torch.cat((sample_batched['img_left'], sample_batched['img_right']), 1)
    #    num_of_samples = input.size(0)
    #    input = input.cuda()
    #    x = input
    #    break

    net_trt = trt_transform(net)

    torch.save(net_trt.state_dict(), 'models/mobilefadnet_trt.pth')

    s = time.time()

    avg_time = []
    display = 50
    warmup = 2
    for i, sample_batched in enumerate(test_loader):
        #if i > 215:
        #    break
        stime = time.time()

        input = torch.cat(
            (sample_batched['img_left'], sample_batched['img_right']), 1)
        print('input Shape: {}'.format(input.size()))
        num_of_samples = input.size(0)
        input = input.cuda()
        break

    iterations = 14 + warmup
    #iterations = len(test_loader) - warmup
    #for i, sample_batched in enumerate(test_loader):
    for i in range(iterations):
        stime = time.time()

        input = torch.cat(
            (sample_batched['img_left'], sample_batched['img_right']), 1)
        print('input Shape: {}'.format(input.size()))
        num_of_samples = input.size(0)
        input = input.cuda()

        input_var = input  #torch.autograd.Variable(input, volatile=True)
        iotime = time.time()
        print('[{}] IO time:{}'.format(i, iotime - stime))

        if i == warmup:
            ss = time.time()

        with torch.no_grad():
            if opt.net == "psmnet" or opt.net == "ganet":
                output = net_trt(input_var)
                output = output.unsqueeze(1)
            elif opt.net == "dispnetc":
                output = net_trt(input_var)[0]
            else:
                output = net_trt(input_var)[-1]
        itime = time.time()
        print('[{}] Inference time:{}'.format(i, itime - iotime))

        if i > warmup:
            avg_time.append((time.time() - ss))
            if (i - warmup) % display == 0:
                print('Average inference time: %f' % np.mean(avg_time))
                mbytes = 1024. * 1024
                print('GPU memory usage memory_allocated: %d MBytes, max_memory_allocated: %d MBytes, memory_cached: %d MBytes, max_memory_cached: %d MBytes, CPU memory usage: %d MBytes' %  \
                    (ct.memory_allocated()/mbytes, ct.max_memory_allocated()/mbytes, ct.memory_cached()/mbytes, ct.max_memory_cached()/mbytes, process.memory_info().rss/mbytes))
                avg_time = []

        print('[%d] output shape:' % i, output.size())
        #output = scale_disp(output, (output.size()[0], 540, 960))
        #disp = output[:, 0, :, :]
        ptime = time.time()
        print('[{}] Post-processing time:{}'.format(i, ptime - itime))

        #for j in range(num_of_samples):

        #    name_items = sample_batched['img_names'][0][j].split('/')
        #    # write disparity to file
        #    output_disp = disp[j]
        #    np_disp = disp[j].float().cpu().numpy()

        #    print('Batch[{}]: {}, average disp: {}({}-{}).'.format(i, j, np.mean(np_disp), np.min(np_disp), np.max(np_disp)))
        #    save_name = '_'.join(name_items).replace(".png", "_d.png")# for girl02 dataset
        #    print('Name: {}'.format(save_name))
        #    skimage.io.imsave(os.path.join(result_path, save_name),(np_disp*256).astype('uint16'))
        print('Current batch time used:: {}'.format(time.time() - stime))

        #save_name = '_'.join(name_items).replace("png", "pfm")# for girl02 dataset
        #print('Name: {}'.format(save_name))
        #np_disp = np.flip(np_disp, axis=0)
        #save_pfm('{}/{}'.format(result_path, save_name), np_disp)

    print('Evaluation time used: {}, avg iter: {}'.format(
        time.time() - ss, (time.time() - ss) / iterations))

Esempio n. 14

0

Mostra file

File: Daily1022_pytorch（二十二）：torch.cuda：CUDA第二部分.py Progetto: rinetd/TorchDaily

# - torch.cuda.get_device_capability(device): 返回设备的cuda能力

#%%
cuda.get_device_capability(0)

#%% [markdown]
# - torch.cuda.get_device_name(device):返回设备名称

#%%
cuda.get_device_name(0)

#%% [markdown]
# - torch.cuda.max_memory_allocated(device):返回指定设备张量的最大GPU内存用量

#%%
cuda.max_memory_allocated(0)

#%%
device = torch.device('cuda') if cuda.is_available() else torch.device('cpu')
X = torch.randn(100, 100, device=device)
X.shape

#%%
cuda.max_memory_allocated(0)

#%% [markdown]
# - torch.cuda.max_memory_cached(device=None):返回指定设备缓存分配器管理的最大GPU内存

#%%
cuda.max_memory_cached(0)