Ejemplo n.º 1
0
def test_lookahead_pred2():
    net = MockNet()

    input = Variable(torch.randn((5, 3)))

    # Zero-weights, zero-biases
    net.fc.weight.data.normal_(0.0, 1.0)
    net.fc.bias.data.fill_(0.0)

    pred = PredOpt(net.parameters())

    result1 = net(input)

    # No step() yet => the same result
    with pred.lookahead(2.0):
        result2 = net(input)
    assert (np.all(np.isclose(result1.data.numpy(), result2.data.numpy())))
Ejemplo n.º 2
0
def test_lookahead_pred():
    net = MockNet()

    input = Variable(torch.randn((5, 3)))

    # Zero-weights, zero-biases
    net.fc.weight.data.fill_(0.0)
    net.fc.bias.data.fill_(0.0)

    pred = PredOpt(net.parameters())

    # Update weights
    net.fc.weight.data.normal_(0, 1.0)
    pred.step()

    result1 = net(input)

    # Lookahead 0.0 => the same results
    with pred.lookahead(0.0):
        result2 = net(input)
    assert (np.all(np.isclose(result1.data.numpy(), result2.data.numpy())))

    # Lookahead 1.0 => doubled results
    with pred.lookahead(1.0):
        result3 = net(input)
    assert (np.all(
        np.isclose((2.0 * result1).data.numpy(), result3.data.numpy())))

    # Outside of 'with' statements => the same results
    result4 = net(input)
    assert (np.all(np.isclose(result1.data.numpy(), result4.data.numpy())))
Ejemplo n.º 3
0
def test_param_update():
    net = MockNet()

    net.fc.weight.data.fill_(0.0)
    net.fc.bias.data.fill_(0.0)

    pred = PredOpt(net.parameters())

    # Update weights
    net.fc.weight.data.fill_(1.0)  # 0.0 => 1.0    (Increased by 1.0)
    net.fc.bias.data.fill_(0.5)  # 0.0 => 0.5    (Increased by 0.5)

    pred.step()

    with pred.lookahead(1.0):
        assert (net.fc.weight.data[0, 0] == 2.0)  # 1.0 + 1.0 * 1.0
        assert (net.fc.bias.data[0] == 1.0)  # 0.5 + 0.5 * 1.0

    assert (net.fc.weight.data[1, 1] == 1.0
            )  # Went back to the correct value (1.0)
    assert (net.fc.bias.data[1] == 0.5)  # Went back to the correct value (1.0)

    with pred.lookahead(5.0):
        assert (net.fc.weight.data[2, 2] == 6.0)  # 1.0 + 1.0 * 5.0
        assert (net.fc.bias.data[2] == 3.0)  # 0.5 + 0.5 * 5.0
Ejemplo n.º 4
0
fake_label = 0

if opt.cuda:
    netD.cuda()
    netG.cuda()
    criterion.cuda()
    input, label = input.cuda(), label.cuda()
    noise, fixed_noise = noise.cuda(), fixed_noise.cuda()

fixed_noise = Variable(fixed_noise)

# setup optimizer
optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

optimizer_predD = PredOpt(netD.parameters())
optimizer_predG = PredOpt(netG.parameters())

if opt.pred:
    print(
        'Prediction of D and G is enabled (see https://openreview.net/forum?id=Skj8Kag0Z&noteId=rkLymJTSf)'
    )
    lookahead_step = 1.0
else:
    lookahead_step = 0.0

for epoch in range(opt.niter):
    for i, data in enumerate(dataloader, 0):
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
Ejemplo n.º 5
0
    def __init__(self, opt, nc):
        self.opt = opt

        ################################################################
        # Initializing Generator and Discriminator Networks
        ################################################################
        self.nz = int(opt.nz)
        ngf = int(opt.ngf)
        ndf = int(opt.ndf)
        self.device = 'cuda' if opt.cuda else 'cpu'

        self.G = _netG(opt.ngpu, self.nz, ngf, nc).to(self.device)
        self.G.apply(weights_init)

        if opt.netG != '':
            self.G.load_state_dict(torch.load(opt.netG))

        self.D = _netD(opt.ngpu, ndf, nc).to(self.device)
        self.D.apply(weights_init)

        if opt.netD != '':
            self.D.load_state_dict(torch.load(opt.netD))

        if opt.verbose and (not self.opt.distributed or dist.get_rank() == 0):
            print(self.G)
            print(self.D)

        ################################################################
        # Initialize Loss Function
        ################################################################
        self.criterion = nn.BCELoss().to(self.device)

        ################################################################
        # Set Prediction Enabled Adam Optimizer settings
        ################################################################
        # self.optimizerD = AdamPre(self.D.parameters(), lr=opt.lr/opt.DLRatio,
        #                           betas=(opt.beta1, 0.999), name='optD')
        # self.optimizerG = AdamPre(self.G.parameters(), lr=opt.lr/opt.GLRatio,
        #                           betas=(opt.beta1, 0.999), name='optG')
        self.optimizerD = optim.Adam(self.D.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizerG = optim.Adam(self.G.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizer_predD = PredOpt(self.D.parameters())
        self.optimizer_predG = PredOpt(self.G.parameters())

        ################################################################
        # Handle special Distributed training modes
        ################################################################
        self.verbose = opt.verbose
        if opt.distributed:
            if opt.cuda:
                self.D = torch.nn.parallel.DistributedDataParallel(self.D)
                self.G = torch.nn.parallel.DistributedDataParallel(self.G)
                self.verbose = opt.verbose and dist.get_rank() == 0
            else:
                self.D = torch.nn.parallel.DistributedDataParallelCPU(self.D)
                self.G = torch.nn.parallel.DistributedDataParallelCPU(self.G)
                self.verbose = opt.verbose and dist.get_rank() == 0
Ejemplo n.º 6
0
class DCGAN():
    def __init__(self, opt, nc):
        self.opt = opt

        ################################################################
        # Initializing Generator and Discriminator Networks
        ################################################################
        self.nz = int(opt.nz)
        ngf = int(opt.ngf)
        ndf = int(opt.ndf)
        self.device = 'cuda' if opt.cuda else 'cpu'

        self.G = _netG(opt.ngpu, self.nz, ngf, nc).to(self.device)
        self.G.apply(weights_init)

        if opt.netG != '':
            self.G.load_state_dict(torch.load(opt.netG))

        self.D = _netD(opt.ngpu, ndf, nc).to(self.device)
        self.D.apply(weights_init)

        if opt.netD != '':
            self.D.load_state_dict(torch.load(opt.netD))

        if opt.verbose and (not self.opt.distributed or dist.get_rank() == 0):
            print(self.G)
            print(self.D)

        ################################################################
        # Initialize Loss Function
        ################################################################
        self.criterion = nn.BCELoss().to(self.device)

        ################################################################
        # Set Prediction Enabled Adam Optimizer settings
        ################################################################
        # self.optimizerD = AdamPre(self.D.parameters(), lr=opt.lr/opt.DLRatio,
        #                           betas=(opt.beta1, 0.999), name='optD')
        # self.optimizerG = AdamPre(self.G.parameters(), lr=opt.lr/opt.GLRatio,
        #                           betas=(opt.beta1, 0.999), name='optG')
        self.optimizerD = optim.Adam(self.D.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizerG = optim.Adam(self.G.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizer_predD = PredOpt(self.D.parameters())
        self.optimizer_predG = PredOpt(self.G.parameters())

        ################################################################
        # Handle special Distributed training modes
        ################################################################
        self.verbose = opt.verbose
        if opt.distributed:
            if opt.cuda:
                self.D = torch.nn.parallel.DistributedDataParallel(self.D)
                self.G = torch.nn.parallel.DistributedDataParallel(self.G)
                self.verbose = opt.verbose and dist.get_rank() == 0
            else:
                self.D = torch.nn.parallel.DistributedDataParallelCPU(self.D)
                self.G = torch.nn.parallel.DistributedDataParallelCPU(self.G)
                self.verbose = opt.verbose and dist.get_rank() == 0

    def train(self,
              niter,
              dataset,
              lookahead_step=1.0,
              plotLoss=False,
              n_batches_viz=1):
        """
        Custom DCGAN training function using prediction steps
        """

        real_label = 1
        fake_label = 0
        fs = []

        for epoch in range(niter):
            for i, data in enumerate(dataset):
                if self.verbose:
                    c1 = time.clock()
                ############################
                # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
                ###########################

                self.D.zero_grad()

                # train on real first
                real_cpu, _ = data
                b_size = real_cpu.size(0)
                input = real_cpu.to(self.device)
                label = torch.full((b_size, ), real_label, device=self.device)

                output = self.D(input)
                errD_real = self.criterion(output, label)
                errD_real.backward()
                D_x = output.data.mean()

                # train with fake
                noise = torch.randn(b_size, self.nz, 1, 1, device=self.device)

                # Compute gradient of D w/ predicted G
                with self.optimizer_predG.lookahead(step=lookahead_step):
                    fake = self.G(noise)
                    label.fill_(fake_label)
                    output = self.D(fake.detach())
                    errD_fake = self.criterion(output, label)
                    errD_fake.backward()
                    D_G_z1 = output.data.mean()
                    errD = errD_real + errD_fake
                    self.optimizerD.step()
                    self.optimizer_predD.step()

                ############################
                # (2) Update G network: maximize -log(1 - D(G(z)))
                ###########################
                self.G.zero_grad()
                label.fill_(real_label)

                # Compute gradient of G w/ predicted D
                with self.optimizer_predD.lookahead(step=lookahead_step):
                    fake = self.G(noise)
                    output = self.D(fake)
                    errG = self.criterion(output, label)
                    errG.backward()
                    D_G_z2 = output.data.mean()
                    self.optimizerG.step()
                    self.optimizer_predG.step()

                if plotLoss:
                    f = [errD.data[0], errG.data[0]]
                    fs.append(f)

                if self.verbose:
                    print('[%d/%d][%d/%d] Loss_D:%.4f Loss_G:%.4f D(x)'
                          ': %.4f D(G(z)): %.4f / %.4f' %
                          (epoch, niter, i, len(dataset), errD.data[0],
                           errG.data[0], D_x, D_G_z1, D_G_z2))

                    print("itr=", epoch, "clock time elapsed=",
                          time.clock() - c1)
                # if i % self.opt.viz_every == 0 or epoch == niter - 1:
                #         iterViz(self.opt, i, self.G, self.fixed_noise)

            if self.verbose:
                # save checkpoints
                torch.save(
                    self.G.state_dict(),
                    '{0}/netG_epoch_{1}.pth'.format(self.opt.outf, epoch))
                torch.save(
                    self.D.state_dict(),
                    '{0}/netD_epoch_{1}.pth'.format(self.opt.outf, epoch))
Ejemplo n.º 7
0
if opt.cuda:
    netD.cuda()
    netG.cuda()
    criterion.cuda()
    input, label = input.cuda(), label.cuda()
    noise, fixed_noise = noise.cuda(), fixed_noise.cuda()

fixed_noise = Variable(fixed_noise)

# setup optimizer
optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

if opt.pred:
    print('Prediction of G is enabled (see https://openreview.net/forum?id=Skj8Kag0Z&noteId=rkLymJTSf)')
    optimizer_pred = PredOpt(netG.parameters())


for epoch in range(opt.niter):
    for i, data in enumerate(dataloader, 0):
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        # train with real
        netD.zero_grad()
        real_cpu, _ = data
        batch_size = real_cpu.size(0)
        if opt.cuda:
            real_cpu = real_cpu.cuda()
        input.resize_as_(real_cpu).copy_(real_cpu)
        label.resize_(batch_size).fill_(real_label)
Ejemplo n.º 8
0
    def __init__(self, opt, verbose=False):
        self.opt = opt
        self.distributed = opt.distributed
        self.verbose = verbose
        self.cuda = opt.cuda
        self.local_rank = opt.local_rank
        device = 'cpu'
        if opt.cuda:
            device = 'cuda' + ':' + str(opt.local_rank)
        self.device = torch.device(device)

        ################################################################
        # Initializing Generator and Discriminator Networks
        ################################################################
        self.nz = int(opt.nz)
        ngf = int(opt.ngf)
        ndf = int(opt.ndf)
        nc = int(opt.nc)

        self.G = _netG(self.nz, ngf, nc).to(self.device)
        self.G.apply(weights_init)

        if opt.netG != '':
            self.G.load_state_dict(torch.load(opt.netG))
            self.G_losses = torch.load('{}/G_losses.pth'.format(self.opt.outf))

        self.D = _netD(ndf, nc).to(self.device)
        self.D.apply(weights_init)

        if opt.netD != '':
            self.D.load_state_dict(torch.load(opt.netD))
            self.D_losses = torch.load('{}/D_losses.pth'.format(self.opt.outf))

        if self.verbose:
            print(self.G)
            print(self.D)

        ################################################################
        # Initialize Loss Function
        ################################################################
        self.criterion = nn.BCELoss()
        if opt.cuda:
            self.criterion.cuda(opt.local_rank)

        ################################################################
        # Set Prediction Enabled Adam Optimizer settings
        ################################################################
        self.optimizerD = optim.Adam(self.D.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizerG = optim.Adam(self.G.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizer_predD = PredOpt(self.D.parameters())
        self.optimizer_predG = PredOpt(self.G.parameters())

        ################################################################
        # Handle special Distributed training modes
        ################################################################
        if opt.distributed:
            if opt.cuda:
                ids = [
                    i for i in range(opt.ngpu * opt.local_rank, opt.ngpu +
                                     opt.local_rank * opt.ngpu)
                ]

                self.D = nn.parallel.DistributedDataParallel(
                    self.D, device_ids=ids).to(self.device)

                self.G = nn.parallel.DistributedDataParallel(
                    self.G, device_ids=ids).to(self.device)
            else:
                if opt.sync_every == 1:
                    self.D = nn.parallel.DistributedDataParallelCPU(self.D)
                    self.G = nn.parallel.DistributedDataParallelCPU(self.G)
                else:
                    self.D = myd.DistributedDataParallelCPU(self.D)
                    self.G = myd.DistributedDataParallelCPU(self.G)
        else:
            if opt.cuda:
                # torch.cuda.set_device(opt.local_rank)
                if torch.cuda.device_count() > 1:
                    self.D = nn.parallel.DataParallel(self.D).to(self.device)
                    self.G = nn.parallel.DataParallel(self.G).to(self.device)
Ejemplo n.º 9
0
class DCGAN():
    def __init__(self, opt, verbose=False):
        self.opt = opt
        self.distributed = opt.distributed
        self.verbose = verbose
        self.cuda = opt.cuda
        self.local_rank = opt.local_rank
        device = 'cpu'
        if opt.cuda:
            device = 'cuda' + ':' + str(opt.local_rank)
        self.device = torch.device(device)

        ################################################################
        # Initializing Generator and Discriminator Networks
        ################################################################
        self.nz = int(opt.nz)
        ngf = int(opt.ngf)
        ndf = int(opt.ndf)
        nc = int(opt.nc)

        self.G = _netG(self.nz, ngf, nc).to(self.device)
        self.G.apply(weights_init)

        if opt.netG != '':
            self.G.load_state_dict(torch.load(opt.netG))
            self.G_losses = torch.load('{}/G_losses.pth'.format(self.opt.outf))

        self.D = _netD(ndf, nc).to(self.device)
        self.D.apply(weights_init)

        if opt.netD != '':
            self.D.load_state_dict(torch.load(opt.netD))
            self.D_losses = torch.load('{}/D_losses.pth'.format(self.opt.outf))

        if self.verbose:
            print(self.G)
            print(self.D)

        ################################################################
        # Initialize Loss Function
        ################################################################
        self.criterion = nn.BCELoss()
        if opt.cuda:
            self.criterion.cuda(opt.local_rank)

        ################################################################
        # Set Prediction Enabled Adam Optimizer settings
        ################################################################
        self.optimizerD = optim.Adam(self.D.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizerG = optim.Adam(self.G.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizer_predD = PredOpt(self.D.parameters())
        self.optimizer_predG = PredOpt(self.G.parameters())

        ################################################################
        # Handle special Distributed training modes
        ################################################################
        if opt.distributed:
            if opt.cuda:
                ids = [
                    i for i in range(opt.ngpu * opt.local_rank, opt.ngpu +
                                     opt.local_rank * opt.ngpu)
                ]

                self.D = nn.parallel.DistributedDataParallel(
                    self.D, device_ids=ids).to(self.device)

                self.G = nn.parallel.DistributedDataParallel(
                    self.G, device_ids=ids).to(self.device)
            else:
                if opt.sync_every == 1:
                    self.D = nn.parallel.DistributedDataParallelCPU(self.D)
                    self.G = nn.parallel.DistributedDataParallelCPU(self.G)
                else:
                    self.D = myd.DistributedDataParallelCPU(self.D)
                    self.G = myd.DistributedDataParallelCPU(self.G)
        else:
            if opt.cuda:
                # torch.cuda.set_device(opt.local_rank)
                if torch.cuda.device_count() > 1:
                    self.D = nn.parallel.DataParallel(self.D).to(self.device)
                    self.G = nn.parallel.DataParallel(self.G).to(self.device)

    def checkpoint(self, epoch):
        torch.save(self.G.state_dict(),
                   '{0}/netG_epoch_{1}.pth'.format(self.opt.outf, epoch))
        torch.save(self.D.state_dict(),
                   '{0}/netD_epoch_{1}.pth'.format(self.opt.outf, epoch))
        torch.save(self.G_losses, '{}/G_losses.pth'.format(self.opt.outf))
        torch.save(self.D_losses, '{}/D_losses.pth'.format(self.opt.outf))
        torch.save(self.Dxs, '{}/D_xs.pth'.format(self.opt.outf))
        torch.save(self.DGz1s, '{}/D_G_z1s.pth'.format(self.opt.outf))
        torch.save(self.DGz2s, '{}/D_G_z2s.pth'.format(self.opt.outf))

    def train(self,
              niter,
              dataset,
              gpred_step=1.0,
              dpred_step=0.0,
              sync_every=1,
              viz_every=10):
        # n_batches_viz=10):
        """
        Custom DCGAN training function using prediction steps
        """

        real_label = 1
        fake_label = 0
        self.D_losses = []
        self.G_losses = []
        self.Dxs = []
        self.DGz1s = []
        self.DGz2s = []
        img_list = []

        # fixed_noise = torch.randn(n_batches_viz, self.nz, 1, 1)
        # if self.cuda:
        #     fixed_noise.cuda(self.local_rank, non_blocking=True)

        itr = 0

        for epoch in range(niter):
            c0 = time.time()
            for i, data in enumerate(dataset):

                c1 = time.time()

                ############################
                # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
                ###########################

                self.D.zero_grad()

                # train on real first
                real_cpu, _ = data
                b_size = real_cpu.size(0)

                input = real_cpu
                if self.cuda:
                    input.cuda(self.local_rank, non_blocking=True)

                label = torch.full((b_size, ), real_label, device=self.device)
                output = self.D(input)
                errD_real = self.criterion(output, label)
                errD_real.backward()
                D_x = output.data.mean()

                # train with fake
                noise = torch.randn(b_size, self.nz, 1, 1, device=self.device)

                # Compute gradient of D w/ predicted G
                with self.optimizer_predG.lookahead(step=gpred_step):
                    fake = self.G(noise)
                    label.fill_(fake_label)
                    output = self.D(fake.detach())
                    errD_fake = self.criterion(output, label)
                    errD_fake.backward()
                    D_G_z1 = output.data.mean()
                    errD = errD_real + errD_fake
                    self.optimizerD.step()
                    self.optimizer_predD.step()

                ############################
                # (2) Update G network: maximize -log(1 - D(G(z)))
                ###########################
                self.G.zero_grad()
                label.fill_(real_label)

                # Compute gradient of G w/ predicted D
                with self.optimizer_predD.lookahead(step=dpred_step):
                    fake = self.G(noise)
                    output = self.D(fake)
                    errG = self.criterion(output, label)
                    errG.backward()
                    D_G_z2 = output.data.mean()
                    self.optimizerG.step()
                    self.optimizer_predG.step()

                self.G_losses.append(errG.data)
                self.D_losses.append(errD.data)
                self.Dxs.append(D_x)
                self.DGz1s.append(D_G_z1)
                self.DGz2s.append(D_G_z2)

                sync_print = ''
                if sync_every != 1 and itr % sync_every == 0:
                    sync_print = '\t --  synced at iteration ' + str(itr)
                    self.D.sync_parameters()
                    self.G.sync_parameters()

                if self.verbose:
                    print('[{}/{}][{}/{}] {:0.2f} secs, Loss_D:{:0.4f} Loss_G:'
                          '{:0.4f} D(x): {:0.4f} D(G(z)): {:0.4f} / {:0.4f}{}'.
                          format(epoch, niter, i, len(dataset),
                                 time.time() - c1, errD.data, errG.data, D_x,
                                 D_G_z1, D_G_z2, sync_print))

                    if itr % viz_every == 0:
                        self.checkpoint(epoch)

                itr += 1

            if sync_every != 1:
                if self.verbose:
                    print('Synchronizing Parameters at epoch:', i)
                self.D.sync_parameters()
                self.G.sync_parameters()

            if self.verbose:
                self.checkpoint(epoch)

            if self.verbose:
                print("Finished epoch in {:0.2f} seconds".format(time.time() -
                                                                 c0))

        return (self.G_losses, self.D_losses, self.Dxs, self.DGz1s, self.DGz2s,
                img_list)