Ejemplo n.º 1
0
def build_flow(args, device):
    # Model
    print('Building flow model..')
    flow_net = flow.Glow(num_channels=args.num_channels,
                         num_levels=args.num_levels,
                         num_steps=args.num_steps)
    flow_net = flow_net.to(device)
    if device == 'cuda':
        flow_net = torch.nn.DataParallel(flow_net, args.gpu_ids)
        cudnn.benchmark = args.benchmark

    start_epoch = 0
    flow_best_loss = 0

    if args.resume_flow:
        # Load checkpoint.
        print('Resuming from checkpoint at ckpts/best.pth.tar...')
        assert os.path.isdir('ckpts'), 'Error: no checkpoint directory found!'
        checkpoint = torch.load('ckpts/best.pth.tar')
        flow_net.load_state_dict(checkpoint['net'])
        flow_best_loss = checkpoint['test_loss']
        start_epoch = checkpoint['epoch']

    loss_fn = util.NLLLoss().to(device)
    optimizer = optim.Adam(flow_net.parameters(), lr=args.lr)
    scheduler = sched.LambdaLR(optimizer, lambda s: min(1., s / args.warm_up))

    return flow_net, loss_fn, optimizer, scheduler, start_epoch, flow_best_loss
Ejemplo n.º 2
0
def main(args):
    # Set up main device and scale batch size
    device = 'cuda' if torch.cuda.is_available() and args.gpu_ids else 'cpu'
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seeds
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    trainset = ImgDatasets(root_dir='data/celeba_sample',
                           files='train_files.txt',
                           mode=args.mode)
    trainloader = data.DataLoader(trainset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers)

    testset = ImgDatasets(root_dir='data/celeba_sample',
                          files='test_files.txt',
                          mode=args.mode)
    testloader = data.DataLoader(testset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers)

    # Model
    print('Building model..')
    net = Glow(num_channels=args.num_channels,
               num_levels=args.num_levels,
               num_steps=args.num_steps,
               mode=args.mode)
    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net, args.gpu_ids)
        cudnn.benchmark = args.benchmark

    start_epoch = 0
    if args.resume:
        # Load checkpoint.
        print('Resuming from checkpoint at ckpts/best.pth.tar...')
        assert os.path.isdir('ckpts'), 'Error: no checkpoint directory found!'
        checkpoint = torch.load('ckpts/best.pth.tar')
        net.load_state_dict(checkpoint['net'])
        global best_loss
        global global_step
        best_loss = checkpoint['test_loss']
        start_epoch = checkpoint['epoch']
        global_step = start_epoch * len(trainset)

    loss_fn = util.NLLLoss().to(device)
    optimizer = optim.Adam(net.parameters(), lr=args.lr)
    scheduler = sched.LambdaLR(optimizer, lambda s: min(1., s / args.warm_up))

    for epoch in range(start_epoch, start_epoch + args.num_epochs):
        train(epoch, net, trainloader, device, optimizer, scheduler, loss_fn,
              args.max_grad_norm)
        test(epoch, net, testloader, device, loss_fn, args.mode)
Ejemplo n.º 3
0
def main(args):
    # Set up main device and scale batch size
    device = 'cuda' if torch.cuda.is_available() and args.gpu_ids else 'cpu'
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seeds
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # No normalization applied, since Glow expects inputs in (0, 1)
    transform_train = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor()
    ])

    trainset = torchvision.datasets.CIFAR10(root='data', train=True, download=True, transform=transform_train)
    trainloader = data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)

    testset = torchvision.datasets.CIFAR10(root='data', train=False, download=True, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)

    # Model
    print('Building model..')
    net = Glow(num_channels=args.num_channels,
               num_levels=args.num_levels,
               num_steps=args.num_steps)
    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net, args.gpu_ids)
        cudnn.benchmark = args.benchmark

    start_epoch = 0
    if args.resume:
        # Load checkpoint.
        print('Resuming from checkpoint at ckpts/best.pth.tar...')
        assert os.path.isdir('ckpts'), 'Error: no checkpoint directory found!'
        checkpoint = torch.load('ckpts/best.pth.tar')
        net.load_state_dict(checkpoint['net'])
        global best_loss
        global global_step
        best_loss = checkpoint['test_loss']
        start_epoch = checkpoint['epoch']
        global_step = start_epoch * len(trainset)

    loss_fn = util.NLLLoss().to(device)
    optimizer = optim.Adam(net.parameters(), lr=args.lr)
    scheduler = sched.LambdaLR(optimizer, lambda s: min(1., s / args.warm_up))

    for epoch in range(start_epoch, start_epoch + args.num_epochs):
        train(epoch, net, trainloader, device, optimizer, scheduler,
              loss_fn, args.max_grad_norm)
        test(epoch, net, testloader, device, loss_fn, args.num_samples)
Ejemplo n.º 4
0
def main(args):
    # Set up main device and scale batch size
    wandb.init(project='dlp-lab7-task1-nf')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Set random seeds
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    trainset = ICLEVRLoader(mode="train")
    print('trainset: ', trainset)
    datasetDir_path = '/home/arg/courses/machine_learning/homework/deep_learning_and_practice/Lab7/dataset/task_1'
    datasetImgDir_path = '/home/arg/courses/machine_learning/homework/deep_learning_and_practice/Lab7/dataset/task_1/images'
    testset = Lab7_Dataset(img_path=datasetImgDir_path,
                           json_path=os.path.join(datasetDir_path,
                                                  'test.json'))
    print('testset: ', testset)

    trainloader = data.DataLoader(trainset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers)

    # Model
    print('Building model..')
    net = Glow(num_channels=args.num_channels,
               num_levels=args.num_levels,
               num_steps=args.num_steps)
    net = net.to(device)
    wandb.watch(net)
    # if device == 'cuda':
    #     net = torch.nn.DataParallel(net, args.gpu_ids)
    #     cudnn.benchmark = args.benchmark

    start_epoch = 1
    # if args.resume:
    #     # Load checkpoint.
    #     print('Resuming from checkpoint at ckpts/best.pth.tar...')
    #     assert os.path.isdir('ckpts'), 'Error: no checkpoint directory found!'
    #     checkpoint = torch.load('ckpts/best.pth.tar')
    #     net.load_state_dict(checkpoint['net'])
    #     global best_loss
    #     global global_step
    #     best_loss = checkpoint['test_loss']
    #     start_epoch = checkpoint['epoch']
    #     global_step = start_epoch * len(trainset)

    loss_fn = util.NLLLoss().to(device)
    optimizer = optim.Adam(net.parameters(), lr=args.lr)
    scheduler = sched.LambdaLR(optimizer, lambda s: min(1., s / args.warm_up))

    train(args.num_epochs, net, trainloader, device, optimizer, scheduler,
          loss_fn, args.max_grad_norm)
Ejemplo n.º 5
0
def main(args):
    # Set up main device and scale batch size
    device = 'cuda' if torch.cuda.is_available() and args.gpu_ids else 'cpu'
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seeds
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    trainloader = data.DataLoader(ICLEVRLoader('./'), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)
    test_condition = get_iCLEVR_data('./', 'test')[1]
    test_condition = torch.Tensor(test_condition).float()
    test_condition = test_condition.to(device)

    # Model
    print('Building model..')
    net = Glow(num_channels=args.num_channels,
               num_levels=args.num_levels,
               num_steps=args.num_steps,
               img_shape=(3,64,64),
               mode=args.mode)
    net = net.to(device)
    evaluator = evaluation_model()
    
    loss_fn = util.NLLLoss().to(device)
    optimizer = optim.Adam(net.parameters(), lr=args.lr)
    scheduler = sched.LambdaLR(optimizer, lambda s: min(1., s / args.warm_up))
    start_epoch = 0

    if args.resume:
        # Load checkpoint.
        print('Resuming from checkpoint')
        checkpoint = torch.load('savemodel/cINN/checkpoint_18.tar')
        net.load_state_dict(checkpoint['net'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        global best_loss
        global global_step
        # best_loss = checkpoint['test_loss']
        start_epoch = checkpoint['epoch']
        global_step = start_epoch * len(trainloader.dataset)


    score_list = []

    for epoch in range(start_epoch, start_epoch + args.num_epochs):
        train(epoch, net, trainloader, device, optimizer, scheduler,
              loss_fn, args.max_grad_norm)
        # test(epoch, net, test_condition, device, loss_fn, args.mode)
        score = test(epoch, net, test_condition, device, evaluator)
        score_list.append(score)
    
    score_list = np.asarray(score_list)
    print('Best epoch: %d\nBest score: %f' % (np.argmax(score_list), np.max(score_list)))
Ejemplo n.º 6
0
def eval(model, embedder, test_loader, opt, writer, device=None):
    print("EVALUATING ON VAL")
    model = model.eval()
    bpd = 0.0
    loss_fn = util.NLLLoss().to(device)
    for i, (imgs, labels, captions) in tqdm(enumerate(test_loader)):
        imgs = imgs.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            if opt.conditioning == 'unconditional':
                condition_embd = None
            else:
                condition_embd = embedder(labels, captions)

            # outputs = model.forward(imgs, condition_embd)
            # loss = outputs['loss'].mean()
            z, sldj = model.forward(imgs, condition_embd, reverse=False)
            loss = loss_fn(z, sldj) / np.prod(imgs.size()[1:])

            bpd += loss / np.log(2)
    bpd /= len(test_loader)
    print("VAL bpd : {}".format(bpd))
    return bpd
Ejemplo n.º 7
0
def main(args, train):
    # Set up main device and scale batch size
    device = 'cuda' if torch.cuda.is_available() and args.gpu_ids else 'cpu'
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seeds
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # No normalization applied, since model expects inputs in (0, 1)
    transform_train = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor()
    ])

    trainset = torchvision.datasets.CIFAR10(root='data', train=True, download=True, transform=transform_train)
    trainloader = data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)

    testset = torchvision.datasets.CIFAR10(root='data', train=False, download=True, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)

    # Model
    print('Building model..')
    net = FlowPlusPlus(scales=[(0, 4), (2, 3)],
                       in_shape=(3, 32, 32),
                       mid_channels=args.num_channels,
                       num_blocks=args.num_blocks,
                       num_dequant_blocks=args.num_dequant_blocks,
                       num_components=args.num_components,
                       use_attn=args.use_attn,
                       drop_prob=args.drop_prob)
    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net, args.gpu_ids)
        cudnn.benchmark = args.benchmark

    start_epoch = 0
    if args.resume:
        # Load checkpoint.
        print('Resuming from checkpoint at save/best.pth.tar...')
        assert os.path.isdir('save'), 'Error: no checkpoint directory found!'
        checkpoint = torch.load('save/best.pth.tar')
        net.load_state_dict(checkpoint['net'])
        global best_loss
        global global_step
        best_loss = checkpoint['test_loss']
        start_epoch = checkpoint['epoch']
        global_step = start_epoch * len(trainset)

    loss_fn = util.NLLLoss().to(device)
    param_groups = util.get_param_groups(net, args.weight_decay, norm_suffix='weight_g')
    optimizer = optim.Adam(param_groups, lr=args.lr)
    warm_up = args.warm_up * args.batch_size
    scheduler = sched.LambdaLR(optimizer, lambda s: min(1., s / warm_up))

    for epoch in range(start_epoch, start_epoch + args.num_epochs):
        #train(epoch, net, trainloader, device, optimizer, scheduler, loss_fn, args.max_grad_norm)
        train(epoch, net, trainloader, device, optimizer, loss_fn, args.max_grad_norm, args, scheduler)
Ejemplo n.º 8
0
def main(args):
    # Set up main device and scale batch size
    device = 'cuda' if torch.cuda.is_available() and args.gpu_ids else 'cpu'
    args.batch_size *= max(1, len(args.gpu_ids))
    torch.autograd.set_detect_anomaly(True)
    # Set random seeds
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # No normalization applied, since model expects inputs in (0, 1)
    transform_train = transforms.Compose([
        # transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
    ])

    transform_test = transforms.Compose([transforms.ToTensor()])

    trainset = torchvision.datasets.CIFAR10(root='data',
                                            train=True,
                                            download=True,
                                            transform=transform_train)
    trainloader = data.DataLoader(trainset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers)
    #
    testset = torchvision.datasets.CIFAR10(root='data',
                                           train=False,
                                           download=True,
                                           transform=transform_test)
    testloader = data.DataLoader(testset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers)

    ################################################################

    # Load CelebA instead of CIFAR10 :

    image_size = 32
    batch_size = 16
    workers = 4

    #    transforms_celeb = transforms.Compose([
    #                                   transforms.Resize(image_size),
    #                                   transforms.CenterCrop(image_size),
    #                                   transforms.ToTensor()
    #                               ])

    #    dataroot_train = r"./data/train"
    #    dataroot_test = r"./data/validation"

    #    trainset = torchvision.datasets.ImageFolder(root=dataroot_train, transform=transforms_celeb)
    #    trainloader = data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)
    #    testset = torchvision.datasets.ImageFolder(root=dataroot_test, transform=transforms_celeb)
    #    testloader = data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)

    #    trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms_celeb)
    #    trainloader = data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)
    #    testset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms_celeb)
    #    testloader = data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)

    import matplotlib.pyplot as plt

    #    def imshow(img):
    #        img = img / 2 + 0.5
    #        npimg = img.numpy()
    #        plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #        plt.show()

    #    dataiter = iter(trainloader)
    #    images = dataiter.next()

    # show images
    # print(images[0])
    # imshow(torchvision.utils.make_grid(images[0]))

    # Model
    print('Building model..')
    net = FlowPlusPlus(scales=[(0, 4), (2, 3)],
                       in_shape=(1, 32, 32),
                       mid_channels=args.num_channels,
                       num_blocks=args.num_blocks,
                       num_dequant_blocks=args.num_dequant_blocks,
                       num_components=args.num_components,
                       use_attn=args.use_attn,
                       drop_prob=args.drop_prob)
    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net, args.gpu_ids)
        cudnn.benchmark = args.benchmark

    start_epoch = 0
    if args.resume:
        # Load checkpoint.
        print('Resuming from checkpoint at save/best.pth.tar...')
        assert os.path.isdir('save'), 'Error: no checkpoint directory found!'
        checkpoint = torch.load('save/best.pth.tar')
        net.load_state_dict(checkpoint['net'])
        global best_loss
        global global_step
        best_loss = checkpoint['test_loss']
        start_epoch = checkpoint['epoch']
        global_step = start_epoch * len(trainset)

    loss_fn = util.NLLLoss().to(device)
    param_groups = util.get_param_groups(net,
                                         args.weight_decay,
                                         norm_suffix='weight_g')
    optimizer = optim.Adam(param_groups, lr=args.lr)
    warm_up = args.warm_up * args.batch_size
    scheduler = sched.LambdaLR(optimizer, lambda s: min(1., s / warm_up))

    for epoch in range(start_epoch, start_epoch + args.num_epochs):
        train(epoch, net, trainloader, device, optimizer, scheduler, loss_fn,
              args.max_grad_norm)
        test(epoch, net, testloader, device, loss_fn, args.num_samples,
             args.save_dir)
Ejemplo n.º 9
0
def train(model,
          embedder,
          optimizer,
          scheduler,
          train_loader,
          val_loader,
          opt,
          writer,
          device=None):
    print("TRAINING STARTS")
    global global_step
    for epoch in range(opt.n_epochs):
        print("[Epoch %d/%d]" % (epoch + 1, opt.n_epochs))
        model = model.train()
        loss_to_log = 0.0
        loss_fn = util.NLLLoss().to(device)
        with tqdm(total=len(train_loader.dataset)) as progress_bar:
            for i, (imgs, labels, captions) in enumerate(train_loader):
                start_batch = time.time()
                imgs = imgs.to(device)
                labels = labels.to(device)

                with torch.no_grad():
                    if opt.conditioning == 'unconditional':
                        condition_embd = None
                    else:
                        condition_embd = embedder(labels, captions)

                optimizer.zero_grad()

                # outputs = model.forward(imgs, condition_embd)
                # loss = outputs['loss'].mean()
                # loss.backward()
                # optimizer.step()
                z, sldj = model.forward(imgs, condition_embd, reverse=False)
                loss = loss_fn(z, sldj) / np.prod(imgs.size()[1:])
                loss.backward()
                if opt.max_grad_norm > 0:
                    util.clip_grad_norm(optimizer, opt.max_grad_norm)
                optimizer.step()
                scheduler.step(global_step)

                batches_done = epoch * len(train_loader) + i
                writer.add_scalar('train/bpd', loss / np.log(2), batches_done)
                loss_to_log += loss.item()
                # if (i + 1) % opt.print_every == 0:
                #     loss_to_log = loss_to_log / (np.log(2) * opt.print_every)
                #     print(
                #         "[Epoch %d/%d] [Batch %d/%d] [bpd: %f] [Time/batch %.3f]"
                #         % (epoch + 1, opt.n_epochs, i + 1, len(train_loader), loss_to_log, time.time() - start_batch)
                #     )
                progress_bar.set_postfix(bpd=(loss_to_log / np.log(2)),
                                         lr=optimizer.param_groups[0]['lr'])
                progress_bar.update(imgs.size(0))
                global_step += imgs.size(0)

                loss_to_log = 0.0

                if (batches_done + 1) % opt.sample_interval == 0:
                    print("sampling_images")
                    model = model.eval()
                    sample_image(model,
                                 embedder,
                                 opt.output_dir,
                                 n_row=4,
                                 batches_done=batches_done,
                                 dataloader=val_loader,
                                 device=device)

        val_bpd = eval(model, embedder, val_loader, opt, writer, device=device)
        writer.add_scalar("val/bpd", val_bpd, (epoch + 1) * len(train_loader))

        torch.save(
            model.state_dict(),
            os.path.join(opt.output_dir, 'models',
                         'epoch_{}.pt'.format(epoch)))
Ejemplo n.º 10
0
        net.load_state_dict({
            k.replace('module.', ''): v
            for k, v in torch.load("ckpts/-1.pth.tar")['net'].items()
        })
    net.eval()
    #testset = dataset(-2, transform, test=True,rotation_data=True)
    testset = torchvision.datasets.CIFAR10(root='dataset/cifar10-torchvision',
                                           train=False,
                                           download=True,
                                           transform=transform)
    #testset = imagenet_val(transform)
    testloader = data.DataLoader(testset,
                                 batch_size=64,
                                 shuffle=False,
                                 num_workers=8)
    loss_fn = util.NLLLoss().to(device)
    loss_meter = util.AverageMeter()
    bpd_sum = 0
    n = 0
    for x, _ in testloader:
        #x = x.to(device)
        z, sldj = net(x, reverse=False)
        loss = loss_fn(z, sldj)
        loss_meter.update(loss.item(), x.size(0))
        n += 1
        bpd_sum += util.bits_per_dim(x, loss_meter.avg)
        #print(util.bits_per_dim(x, loss_meter.avg))
        #print(bpd_sum/n)
    print(bpd_sum / n)

for i in range(3):