Esempio n. 1
0
    bceloss = bceloss.cuda()

lib.init_params(gen)
lib.init_params(disc)

optim_g = torch.optim.Adam(gen.parameters(), lr=1e-3)
optim_d = torch.optim.Adam(disc.parameters(), lr=1e-3)

for epoch in range(epochs):
    gen.train()
    disc.train()

    for (x, _) in TrainDataLoader:
        #x = x.view(-1, 28*28)
        num_data = x.shape[0]
        noise = lib.sample_noise(num_data, num_noise)
        zeros = torch.zeros(num_data, 1)
        ones = torch.ones(num_data, 1)

        if torch.cuda.is_available():
            x = x.cuda()
            noise = noise.cuda()
            zeros = zeros.cuda()
            ones = ones.cuda()

        x_g = gen(noise)

        ### Discriminator train
        optim_d.zero_grad()
        disc.zero_grad()
Esempio n. 2
0
lib.init_params(gen)
lib.init_params(disc)

optim_g = torch.optim.Adam(gen.parameters(), lr=1e-3, betas=(0, 0.9))
optim_d = torch.optim.Adam(disc.parameters(), lr=1e-3, betas=(0, 0.9))

for epoch in range(epochs):
    gen.train()
    disc.train()

    for i in range(1):
        for _train_data in train_data:
            x = _train_data.view(-1, 1, 128, 128)
            num_data = x.shape[0]
            noise = lib.sample_noise(num_data, num_noise).to(device)

            x_g = gen(noise)

            ### Discriminator train
            optim_d.zero_grad()

            ## Regularization Term
            eps = torch.rand(1).item()
            x_hat = (x.detach().clone() * eps + x_g.detach().clone() *
                     (1 - eps)).requires_grad_(True)

            loss_xhat = disc(x_hat)
            fake = torch.ones(loss_xhat.shape[0],
                              1).requires_grad_(False).to(device)
Esempio n. 3
0
    def train(self, data_loader, task, generator, classifier):
        self.cur_task = task
        running_loss = 0.0

        for i, data in enumerate(data_loader):
            x, y = data
            x = x.to(self.device)
            y = y.to(self.device)

            self.G.data.fill_(0.0)
            # Compute gradient w.r.t. past tasks with episodic memory
            ### !!!!!
            if self.cur_task > 0:
                for k in range(0, self.cur_task):
                    self.zero_grad()
                    noise = lib.sample_noise(self.mem_size,
                                             self.num_noise).to(self.device)
                    g_image = generator(noise).to(self.device)
                    g_label = classifier(g_image).max(dim=1)[1]
                    g_pred = self.net(g_image)
                    loss = self.criterion(g_pred, g_label)
                    loss.backward()

                    # Copy parameters into Matrix "G"
                    j = 0
                    for params in self.parameters():
                        if params is not None:
                            if j == 0:
                                stpt = 0
                            else:
                                stpt = sum(self.grad_numels[:j])

                            endpt = sum(self.grad_numels[:j + 1])
                            self.G[stpt:endpt,
                                   k].data.copy_(params.grad.data.view(-1))
                            j += 1

            self.zero_grad()
            self.optim.zero_grad()

            # Compute gradient w.r.t. current continuum
            pred = self.net(x)
            #             pred[:, : self.cur_task * 10].data.fill_(-10e10)
            #             pred[:, (self.cur_task+1) * 10:].data.fill_(-10e10)

            #             pred = pred[:, self.cur_task*10: (self.cur_task+1)*10]
            loss = self.criterion(pred, y)
            loss.backward()

            running_loss += loss.item()
            if i % 100 == 99:
                msg = '[%d\t%d] AVG. loss: %.3f\n' % (
                    task + 1, i + 1, running_loss / 100)  #(i*5))
                print(msg)
                #self.log_file.write(msg)
                running_loss = 0.0

            if self.cur_task > 0:
                grad = []
                j = 0
                for params in self.parameters():
                    if params is not None:
                        if j == 0:
                            stpt = 0
                        else:
                            stpt = sum(self.grad_numels[:j])

                        endpt = sum(self.grad_numels[:j + 1])
                        self.G[stpt:endpt,
                               self.cur_task].data.copy_(params.grad.view(-1))
                        j += 1

                # Solve Quadratic Problem
                dotprod = torch.mm(self.G[:, self.cur_task].unsqueeze(0),
                                   self.G[:, :self.cur_task + 1])

                # projection
                if (dotprod < 0).sum() > 0:
                    if i % 100 == 99:
                        print("projection")
                    mem_grad_np = self.G[:, :self.cur_task +
                                         1].cpu().t().double().numpy()
                    curtask_grad_np = self.G[:, self.cur_task].unsqueeze(
                        1).cpu().contiguous().view(-1).double().numpy()

                    t = mem_grad_np.shape[0]
                    P = np.dot(mem_grad_np, mem_grad_np.transpose())
                    P = 0.5 * (P + P.transpose()) + np.eye(t) * self.eps
                    q = np.dot(mem_grad_np, curtask_grad_np) * (-1)
                    G = np.eye(t)
                    h = np.zeros(t) + self.margin
                    v = quadprog.solve_qp(P, q, G, h)[0]
                    x = np.dot(v, mem_grad_np) + curtask_grad_np
                    newgrad = torch.Tensor(x).view(-1, )

                    # Copy gradients into params
                    j = 0
                    for params in self.parameters():
                        if params is not None:
                            if j == 0:
                                stpt = 0
                            else:
                                stpt = sum(self.grad_numels[:j])

                            endpt = sum(self.grad_numels[:j + 1])
                            params.grad.data.copy_(
                                newgrad[stpt:endpt].contiguous().view(
                                    params.grad.data.size()))
                            j += 1

            self.optim.step()
Esempio n. 4
0
def train(**kwargs):
    TrainDataLoaders = kwargs['TrainDataLoaders']
    TestDataLoaders = kwargs['TestDataLoaders']
    batch_size = kwargs['batch_size']
    num_noise = kwargs['num_noise']
    cur_task = kwargs['cur_task']
    gen = kwargs['gen']
    disc = kwargs['disc']
    solver = kwargs['solver']
    pre_gen = kwargs['pre_gen']
    pre_solver = kwargs['pre_solver']
    ratio = kwargs['ratio']
    epochs = kwargs['epochs']

    assert (ratio >= 0 or ratio <= 1)

    ld = 10
    optim_g = torch.optim.Adam(gen.parameters(), lr=0.001, betas=(0, 0.9))
    optim_d = torch.optim.Adam(disc.parameters(), lr=0.001, betas=(0, 0.9))
    optim_s = torch.optim.Adam(solver.parameters(), lr=0.001)
    TrainDataLoader = TrainDataLoaders[cur_task]

    # Generator Training
    for epoch in range(epochs):
        gen.train()
        disc.train()
        for i, (x, y) in enumerate(TrainDataLoader):
            x = x.view(-1, 28 * 28)
            num_data = x.shape[0]
            noise = lib.sample_noise(num_data, num_noise)

            if torch.cuda.is_available():
                x = x.cuda()
                noise = noise.cuda()

            if pre_gen is not None:
                with torch.no_grad():
                    # append generated image & label from previous scholar
                    x_g = pre_gen(lib.sample_noise(batch_size, num_noise))
                    '''
                    gimg_min = gen_image.min(dim=1, keepdim=True)[0].min(dim=2, keepdim=True)[0]
                    gen_image = ((gen_image - gimg_min) * 256)
                    '''
                    x = torch.cat((x, x_g))
                    perm = torch.randperm(x.shape[0])[:num_data]
                    x = x[perm]

            #x = x.unsqueeze(1)

            ### Discriminator train
            optim_d.zero_grad()
            x_g = gen(noise)

            ## Regularization term
            eps = torch.rand(1).item()
            x_hat = (x.detach().clone() * eps + x_g.detach().clone() *
                     (1 - eps)).requires_grad_(True)

            loss_xhat = disc(x_hat)
            fake = torch.ones(loss_xhat.shape[0], 1).requires_grad_(False)
            if torch.cuda.is_available():
                fake = fake.cuda()

            gradients = torch.autograd.grad(outputs=loss_xhat,
                                            inputs=x_hat,
                                            grad_outputs=fake,
                                            create_graph=True,
                                            retain_graph=True,
                                            only_inputs=True)[0]
            gradients = gradients.view(gradients.shape[0], -1)
            gp = ((gradients.norm(2, dim=1) - 1)**2).mean() * ld

            p_real = disc(x)
            p_fake = disc(x_g.detach())

            loss_d = torch.mean(p_fake) - torch.mean(p_real) + gp
            loss_d.backward()
            optim_d.step()

            #if i % 5 == 4:
            ### Generator Training
            optim_g.zero_grad()
            p_fake = disc(x_g)

            loss_g = -torch.mean(p_fake)
            loss_g.backward()
            optim_g.step()

        print("[Epoch %d/%d] [D loss: %f] [G loss: %f]" %
              (epoch + 1, epochs, loss_d.item(), loss_g.item()))
        if epoch % 10 == 9:
            gen_image = gen(lib.sample_noise(24,
                                             num_noise)).view(24, 1, 28, 28)
            lib.imshow_grid(gen_image)

    # train solver
    for image, label in TrainDataLoader:
        if torch.cuda.is_available():
            image = image.cuda()
            label = label.cuda()

        output = solver(image)
        loss = celoss(output, label) * ratio

        if pre_solver is not None:
            noise = lib.sample_noise(batch_size, num_noise)
            g_image = pre_gen(noise)
            g_label = pre_solver(g_image).max(dim=1)[1]
            g_output = solver(g_image)
            loss += celoss(g_output, g_label) * (1 - ratio)

        loss.backward()
        optim_s.step()
Esempio n. 5
0
    lib.init_params(disc)

    TrainDataLoader = TrainDataLoaders[t]

    optim_g = torch.optim.Adam(gen.parameters(), lr=0.001, betas=(0, 0.9))
    optim_d = torch.optim.Adam(disc.parameters(), lr=0.001, betas=(0, 0.9))

    # Generator Training
    for epoch in range(gen_epochs):
        gen.train()
        disc.train()

        for i, (x, _) in enumerate(TrainDataLoader):
            x = x.to(device)
            num_data = x.shape[0]
            noise = lib.sample_noise(num_data, num_noise, device).to(device)

            if pre_gen is not None:
                with torch.no_grad():
                    # append generated image & label from previous scholar
                    datapart = int(num_data * ratio)
                    perm = torch.randperm(num_data)[:datapart]
                    x = x[perm]

                    x_g = pre_gen(lib.sample_noise(num_data, num_noise,
                                                   device))
                    perm = torch.randperm(num_data)[:num_data - datapart]
                    x_g = x_g[perm]

                    x = torch.cat((x, x_g))
Esempio n. 6
0
def train(**kwargs):
    TrainDataLoaders = kwargs['TrainDataLoaders']
    TestDataLoaders = kwargs['TestDataLoaders']
    batch_size = kwargs['batch_size']
    num_noise = kwargs['num_noise']
    cur_task = kwargs['cur_task']
    gen = kwargs['gen']
    disc = kwargs['disc']
    solver = kwargs['solver']
    pre_gen = kwargs['pre_gen']
    pre_solver = kwargs['pre_solver']
    ratio = kwargs['ratio']
    epochs = kwargs['epochs']
    
    
    assert (ratio >=0 or ratio <= 1)
    bceloss = torch.nn.BCELoss()
    celoss = torch.nn.CrossEntropyLoss()

    gen_optim = torch.optim.Adam(gen.parameters(), lr=0.001)
    disc_optim = torch.optim.Adam(disc.parameters(), lr=0.001)
    solver_optim = torch.optim.Adam(solver.parameters(), lr=0.001)
    train_dataloader = TrainDataLoaders[cur_task]
    
    # GAN Training
    for epoch in range(epochs):
        for image, label in train_dataloader:
            num_images = image.shape[0]
            
            if torch.cuda.is_available():
                image = image.cuda()
                #label = label.cuda()
            
            if pre_gen is not None:
                with torch.no_grad():
                    # append generated image & label from previous scholar
                    gen_image = pre_gen(lib.sample_noise(batch_size, num_noise)).squeeze()
                    gimg_min = gen_image.min(dim=1, keepdim=True)[0].min(dim=2, keepdim=True)[0]
                    gen_image = ((gen_image - gimg_min) * 256)
                    gen_label = pre_solver(gen_image).max(dim=1)[1]
    
                    image = torch.cat((image, gen_image))
                    label = torch.cat((label, gen_label))
                    
                    perm = torch.randperm(image.shape[0])[:num_images]
                    image = image[perm]
                

            image = image.unsqueeze(1)
            
            ### Discriminator Training
            disc_optim.zero_grad()
            p_real = disc(image)
            p_fake = disc(gen(lib.sample_noise(image.shape[0], num_noise)))

            ones = torch.ones_like(p_real)
            zeros = torch.zeros_like(p_real)
            if torch.cuda.is_available():
                ones = ones.cuda()
                zeros = zeros.cuda()

            loss_d = bceloss(p_real, ones) + bceloss(p_fake, zeros)

            loss_d.backward()
            disc_optim.step()

            # Clipping weights
            
            for params in disc.parameters():
                params = torch.clamp(params, -0.01, 0.01)
            
            ### Generator Training
            gen_optim.zero_grad()
            p_fake = disc(gen(lib.sample_noise(batch_size, num_noise)))

            ones = torch.ones_like(p_fake)
            if torch.cuda.is_available():
                ones = ones.cuda()

            loss_g = bceloss(p_fake, ones)
            loss_g.backward()

            gen_optim.step()

        if epoch % 50 == 49:
            p_real, p_fake = lib.gan_evaluate(batch_size = batch_size,
                                              num_noise = num_noise,
                                              cur_task = cur_task, 
                                              gen = gen, 
                                              disc = disc, 
                                              TestDataLoaders = TestDataLoaders)
            gen_image = gen(lib.sample_noise(batch_size, num_noise))
            print("(Epoch {}/{}) p_real: {} | p_fake: {}\n".format(epoch, epochs, p_real, p_fake))
            lib.imshow_grid(gen_image)
    
    # train solver
    for image, label in train_dataloader:
        if torch.cuda.is_available():
            image = image.cuda()
            label = label.cuda()

        output = solver(image)
        loss = celoss(output, label) * ratio

        if pre_solver is not None:
            noise = lib.sample_noise(batch_size, num_noise)
            g_image = pre_gen(noise)
            g_label = pre_solver(g_image).max(dim=1)[1]
            g_output = solver(g_image)
            loss += celoss(g_output, g_label) * (1 - ratio)

        loss.backward()
        solver_optim.step()
Esempio n. 7
0
    ### optimizer
    optim_g = torch.optim.Adam(gen.parameters(), lr=1e-3, betas=(0, 0.9))
    optim_d = torch.optim.Adam(disc.parameters(), lr=1e-3, betas=(0, 0.9))
    optim_s = torch.optim.Adam(solver.parameters(), lr=1e-3)

    for epoch in range(epochs):
        gen.train()
        disc.train()

        ### WGAN_GP Learning
        for i in range(10):
            for _train_data in train_data:
                x = _train_data.view(-1, 1, 128, 128).to(device)
                num_data = x.shape[0]
                noise = lib.sample_noise(num_data, num_noise).to(device)

                x_g = gen(noise)

                ### Discriminator train
                optim_d.zero_grad()

                ## Regularization Term
                eps = torch.rand(1).item()
                x_hat = (x.detach().clone() * eps + x_g.detach().clone() * (1 - eps)).requires_grad_(True)

                loss_xhat = disc(x_hat)
                fake = torch.ones(loss_xhat.shape[0], 1).requires_grad_(False).to(device)

                gradients = torch.autograd.grad(
                    outputs = loss_xhat,