Example #1
0
    def __init__(self, opt, nclasses, ndomains, mean, std, source_trainloader, source_valloader, targetloader):

        self.source_trainloader = source_trainloader
        self.source_valloader = source_valloader
        self.targetloader = targetloader
        self.opt = opt
        self.mean = mean
        self.std = std
        self.best_val = 0
        self.best_test = 0
        self.nclasses = nclasses
        self.ndomains = ndomains
        
        # Defining networks and optimizers
        self.netF1 = models._netF(opt)
        self.netF2 = models._netF(opt)
        self.netC1 = models._netC(opt, nclasses)
        self.netC2 = models._netC(opt, ndomains)
        self.netC3 = models._netC(opt, ndomains)
        self.netG = models._netG(opt, (opt.ndf*2)*2)
        self.netD = models._netD(opt, nclasses, ndomains)

        # Weight initialization
        self.netF1.apply(utils.weights_init)
        self.netF2.apply(utils.weights_init)
        self.netC1.apply(utils.weights_init)
        self.netC2.apply(utils.weights_init)
        self.netC3.apply(utils.weights_init)
        self.netG.apply(utils.weights_init)
        self.netD.apply(utils.weights_init)

        # Defining loss criterions
        self.criterion_c = nn.CrossEntropyLoss()
        self.criterion_s = nn.BCELoss()

        if opt.gpu>=0:
            self.netF1.cuda()
            self.netF2.cuda()
            self.netC1.cuda()
            self.netC2.cuda()
            self.netC3.cuda()
            self.netG.cuda()
            self.netD.cuda()
            self.criterion_c.cuda()
            self.criterion_s.cuda()

        # Defining optimizers
        self.optimizerF1 = optim.Adam(self.netF1.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerF2 = optim.Adam(self.netF2.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerC1 = optim.Adam(self.netC1.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerC2 = optim.Adam(self.netC2.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerC3 = optim.Adam(self.netC3.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerG = optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerD = optim.Adam(self.netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

        # Other variables
        self.real_label_val = 1
        self.fake_label_val = 0
Example #2
0
    def __init__(self, opt, nclasses, mean, std, source_trainloader,
                 source_valloader, target_trainloader, target_valloader):

        self.source_trainloader = source_trainloader
        self.source_valloader = source_valloader
        self.target_trainloader = target_trainloader
        self.target_valloader = target_valloader
        self.opt = opt
        self.mean = mean
        self.std = std
        self.best_val = 0

        # Defining networks and optimizers
        self.nclasses = nclasses
        self.netG = models._netG(opt, nclasses)
        self.netD = models._netD(opt, nclasses)
        self.netF = models._netF(opt)
        self.netC = models._netC(opt, nclasses)

        # Weight initialization
        self.netG.apply(utils.weights_init)
        self.netD.apply(utils.weights_init)
        self.netF.apply(utils.weights_init)
        self.netC.apply(utils.weights_init)

        # Defining loss criterions
        self.criterion_c = nn.CrossEntropyLoss()
        self.criterion_s = nn.BCELoss()

        if opt.gpu >= 0:
            self.netD.cuda()
            self.netG.cuda()
            self.netF.cuda()
            self.netC.cuda()
            self.criterion_c.cuda()
            self.criterion_s.cuda()

        # Defining optimizers
        self.optimizerD = optim.Adam(self.netD.parameters(),
                                     lr=opt.lr,
                                     betas=(0.8, 0.999))
        self.optimizerG = optim.Adam(self.netG.parameters(),
                                     lr=opt.lr,
                                     betas=(0.8, 0.999))
        self.optimizerF = optim.Adam(self.netF.parameters(),
                                     lr=opt.lr,
                                     betas=(0.8, 0.999))
        self.optimizerC = optim.Adam(self.netC.parameters(),
                                     lr=opt.lr,
                                     betas=(0.8, 0.999))

        # Other variables
        self.real_label_val = 1
        self.fake_label_val = 0
Example #3
0
    def __init__(self, mean, std, source_trainloader, source_valloader,
                 targetloader):

        self.source_trainloader = source_trainloader
        self.source_valloader = source_valloader
        self.targetloader = targetloader
        self.mean = mean
        self.std = std
        self.best_val = 0
        self.cuda = True if torch.cuda.is_available() else False

        # Defining networks and optimizers
        self.netG = models._netG()
        self.netD = models._netD()
        self.netF = models._netF()
        self.netC = models._netC()

        # Weight initialization
        self.netG.apply(utils.weights_init)
        self.netD.apply(utils.weights_init)
        self.netF.apply(utils.weights_init)
        self.netC.apply(utils.weights_init)

        # Defining loss criterions
        self.criterion_c = nn.CrossEntropyLoss()
        self.criterion_s = nn.BCELoss()

        if self.cuda:
            self.netD.cuda()
            self.netG.cuda()
            self.netF.cuda()
            self.netC.cuda()
            self.criterion_c.cuda()
            self.criterion_s.cuda()

        # Defining optimizers
        self.optimizerD = optim.Adam(self.netD.parameters(),
                                     lr=consts.lr,
                                     betas=(consts.beta1, 0.999))
        self.optimizerG = optim.Adam(self.netG.parameters(),
                                     lr=consts.lr,
                                     betas=(consts.beta1, 0.999))
        self.optimizerF = optim.Adam(self.netF.parameters(),
                                     lr=consts.lr,
                                     betas=(consts.beta1, 0.999))
        self.optimizerC = optim.Adam(self.netC.parameters(),
                                     lr=consts.lr,
                                     betas=(consts.beta1, 0.999))

        # Other variables
        self.real_label_val = 1
        self.fake_label_val = 0
Example #4
0
    def __init__(self, opt, nclasses, mean, std, source_trainloader, source_valloader, targetloader):

        self.source_trainloader = source_trainloader
        self.source_valloader = source_valloader
        self.targetloader = targetloader
        self.opt = opt
        self.mean = mean
        self.std = std
        self.best_val = 0
        self.best_test = 0
        
        # Defining networks and optimizers
        self.nclasses = nclasses
        self.netG = models._netG(opt, self.nclasses+1+opt.ndf*2)
        self.netD = models._netD(opt, nclasses, 1)
        self.netF = models._netF(opt)
        self.netC = models._netC(opt, nclasses)

        # Weight initialization
        self.netG.apply(utils.weights_init)
        self.netD.apply(utils.weights_init)
        self.netF.apply(utils.weights_init)
        self.netC.apply(utils.weights_init)

        # Defining loss criterions
        self.criterion_c = nn.CrossEntropyLoss()
        self.criterion_s = nn.BCELoss()

        if opt.gpu>=0:
            self.netD.cuda()
            self.netG.cuda()
            self.netF.cuda()
            self.netC.cuda()
            self.criterion_c.cuda()
            self.criterion_s.cuda()

        # Defining optimizers
        self.optimizerD = optim.Adam(self.netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerG = optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerF = optim.Adam(self.netF.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerC = optim.Adam(self.netC.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

        # Other variables
        self.real_label_val = 1
        self.fake_label_val = 0
Example #5
0
 def __init__(self, config, exp_idx, seed=1000):
     self.seed = seed
     self.exp_idx = exp_idx
     self.iqa_hash = collections.defaultdict(dict)
     self.config = read_json(config)
     self.netG = _netG(self.config["G_fil_num"]).cuda()
     self.netD = _netD(self.config["D_fil_num"]).cuda()
     # self.cnn = self.initial_CNN('./cnn_config.json', exp_idx=0, epoch=100)
     self.initial_CNN('./cnn_config.json', exp_idx=0)
     if self.config["D_pth"]:
         self.netD.load_state_dict(torch.load(self.config["D_pth"]))
     if self.config["G_pth"]:
         self.netD.load_state_dict(torch.load(self.config["G_pth"]))
     self.checkpoint_dir = self.config["checkpoint_dir"]
     if not os.path.exists(self.checkpoint_dir):
         os.mkdir(self.checkpoint_dir)
     self.prepare_dataloader()
     self.log_name = self.config["log_name"]
     self.iqa_name = self.config["iqa_name"]
     self.eng = matlab.engine.start_matlab()
     self.save_every_epoch = self.config["save_every_epoch"]
Example #6
0
def train():
    param = _param()
    dataset = DATA_LOADER(opt)
    param.X_dim = dataset.feature_dim

    data_layer = FeatDataLayer(dataset.train_label.numpy(),
                               dataset.train_feature.numpy(), opt)
    result = Result()
    result_gzsl = Result()

    netG = _netG_att(opt, dataset.text_dim, dataset.feature_dim).cuda()
    netG.apply(weights_init)
    print(netG)
    netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda()
    netD.apply(weights_init)
    print(netD)

    start_step = 0

    nets = [netG, netD]

    tr_cls_centroid = Variable(
        torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda()
    optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9))

    for it in range(start_step, 3000 + 1):
        """ Discriminator """
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_sem[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()

            # GAN's D loss
            D_real, C_real = netD(X)
            D_loss_real = torch.mean(D_real)
            C_loss_real = F.cross_entropy(C_real, y_true)
            DC_loss = opt.Adv_LAMBDA * (-D_loss_real + C_loss_real)
            DC_loss.backward()

            # GAN's D loss
            G_sample = netG(z, text_feat).detach()
            D_fake, C_fake = netD(G_sample)
            D_loss_fake = torch.mean(D_fake)

            C_loss_fake = F.cross_entropy(C_fake, y_true)
            DC_loss = opt.Adv_LAMBDA * (D_loss_fake + C_loss_fake)
            DC_loss.backward()

            # train with gradient penalty (WGAN_GP)
            grad_penalty = opt.Adv_LAMBDA * calc_gradient_penalty(
                netD, X.data, G_sample.data)
            grad_penalty.backward()

            Wasserstein_D = D_loss_real - D_loss_fake
            optimizerD.step()
            reset_grad(nets)
        """ Generator """
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_sem[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()

            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()

            G_sample = netG(z, text_feat)
            D_fake, C_fake = netD(G_sample)
            _, C_real = netD(X)

            # GAN's G loss
            G_loss = torch.mean(D_fake)
            # Auxiliary classification loss
            C_loss = (F.cross_entropy(C_real, y_true) +
                      F.cross_entropy(C_fake, y_true)) / 2
            GC_loss = opt.Adv_LAMBDA * (-G_loss + C_loss)

            # Centroid loss
            Euclidean_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for i in range(dataset.train_cls_num):
                    sample_idx = (y_true == i).data.nonzero().squeeze()
                    if sample_idx.numel() == 0:
                        Euclidean_loss += 0.0
                    else:
                        G_sample_cls = G_sample[sample_idx, :]
                        Euclidean_loss += (
                            G_sample_cls.mean(dim=0) -
                            tr_cls_centroid[i]).pow(2).sum().sqrt()
                Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA

            # ||W||_2 regularization
            reg_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for name, p in netG.named_parameters():
                    if 'weight' in name:
                        reg_loss += p.pow(2).sum()
                reg_loss.mul_(opt.REG_W_LAMBDA)

            all_loss = GC_loss + Euclidean_loss + reg_loss
            all_loss.backward()
            optimizerG.step()
            reset_grad(nets)

        if it % opt.disp_interval == 0 and it:
            acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])
            acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])

            # log_text = 'Iter-{}; Was_D: {:.3f}; Euc_ls: {:.3f}; reg_ls: {:.3f}; G_loss: {:.3f}; D_loss_real: {:.3f};' \
            #            ' D_loss_fake: {:.3f}; rl: {:.2f}%; fk: {:.2f}%'\
            #             .format(it, Wasserstein_D.data[0],  Euclidean_loss.data[0], reg_loss.data[0],
            #                     G_loss.data[0], D_loss_real.data[0], D_loss_fake.data[0], acc_real * 100, acc_fake * 100)
            log_text = 'Iter-{} *********************'.format(it)
            print(log_text)
            # with open(log_dir, 'a') as f:
            #     f.write(log_text+'\n')

        if it % opt.evl_interval == 0 and it >= 100:
            netG.eval()
            eval_fakefeat_test(it, netG, dataset, param, result)
            # eval_fakefeat_test_Hit(it, netG, dataset, param)
            eval_fakefeat_test_gzsl(it, netG, dataset, param, result_gzsl)

            netG.train()
Example #7
0
def train(opt):
    param = _param()
    dataset = LoadDataset(opt)
    param.X_dim = dataset.feature_dim

    data_layer = FeatDataLayer(dataset.labels_train,
                               dataset.pfc_feat_data_train, opt)

    # initialize model
    netGs = []
    netDs = []
    parts = 7 if opt.dataset == "CUB2011" else 6
    for part in range(parts):
        netGs.append(_netG(dataset.text_dim, 512).cuda().apply(weights_init))
        netDs.append(
            _netD(dataset.train_cls_num, 512).cuda().apply(weights_init))

    start_step = 0

    part_cls_centrild = torch.from_numpy(
        dataset.part_cls_centrild.astype('float32')).cuda()

    # initialize optimizers
    optimizerGs = []
    optimizerDs = []
    for netG in netGs:
        optimizerGs.append(
            optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9)))
    for netD in netDs:
        optimizerDs.append(
            optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9)))

    for it in range(start_step, 3000 + 1):
        """ Discriminator """
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array(
                [dataset.train_text_feature[i, :] for i in labels])
            text_feat = torch.from_numpy(text_feat.astype('float32')).cuda()
            X = torch.from_numpy(feat_data).cuda()
            y_true = torch.from_numpy(labels.astype('int')).cuda()

            for part in range(parts):
                z = torch.randn(opt.batchsize, param.z_dim).cuda()
                D_real, C_real = netDs[part](X[:, part * 512:(part + 1) * 512])
                D_loss_real = torch.mean(D_real)
                C_loss_real = F.cross_entropy(C_real, y_true)
                DC_loss = -D_loss_real + C_loss_real
                DC_loss.backward()

                G_sample = netGs[part](z, text_feat)
                D_fake, C_fake = netDs[part](G_sample)
                D_loss_fake = torch.mean(D_fake)
                C_loss_fake = F.cross_entropy(C_fake, y_true)
                DC_loss = D_loss_fake + C_loss_fake
                DC_loss.backward()

                grad_penalty = calc_gradient_penalty(
                    opt.batchsize, netDs[part],
                    X.data[:, part * 512:(part + 1) * 512], G_sample.data,
                    opt.GP_LAMBDA)
                grad_penalty.backward()

                Wasserstein_D = D_loss_real - D_loss_fake
                # writer.add_scalar("Wasserstein_D"+str(part), Wasserstein_D.item(), it)

                optimizerDs[part].step()
                netGs[part].zero_grad()
                netDs[part].zero_grad()
        """ Generator """
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array(
                [dataset.train_text_feature[i, :] for i in labels])
            text_feat = torch.from_numpy(text_feat.astype('float32')).cuda()

            X = torch.from_numpy(feat_data).cuda()
            y_true = torch.from_numpy(labels.astype('int')).cuda()

            for part in range(parts):
                z = torch.randn(opt.batchsize, param.z_dim).cuda()
                G_sample = netGs[part](z, text_feat)
                # G_sample_all[:, part*512:(part+1)*512] = G_sample
                D_fake, C_fake = netDs[part](G_sample)
                _, C_real = netDs[part](X[:, part * 512:(part + 1) * 512])

                G_loss = torch.mean(D_fake)
                C_loss = (F.cross_entropy(C_real, y_true) +
                          F.cross_entropy(C_fake, y_true)) / 2
                GC_loss = -G_loss + C_loss
                # writer.add_scalar("GC_loss"+str(part), GC_loss.item(), it)

                Euclidean_loss = torch.tensor([0.0]).cuda()
                if opt.REG_W_LAMBDA != 0:
                    for i in range(dataset.train_cls_num):
                        sample_idx = (y_true == i).data.nonzero().squeeze()
                        if sample_idx.numel() == 0:
                            Euclidean_loss += 0.0
                        else:
                            G_sample_cls = G_sample[sample_idx, :]
                            Euclidean_loss += (G_sample_cls.mean(dim=0) -
                                               part_cls_centrild[i][part]
                                               ).pow(2).sum().sqrt()
                    Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA

                # ||W||_2 regularization
                reg_loss = torch.Tensor([0.0]).cuda()
                if opt.REG_W_LAMBDA != 0:

                    for name, p in netGs[part].named_parameters():
                        if 'weight' in name:
                            reg_loss += p.pow(2).sum()
                    reg_loss.mul_(opt.REG_W_LAMBDA)

                # writer.add_scalar("reg_loss"+str(part), reg_loss.item(), it)

                # ||W_z||21 regularization, make W_z sparse
                reg_Wz_loss = torch.Tensor([0.0]).cuda()
                if opt.REG_Wz_LAMBDA != 0:
                    Wz = netGs[part].rdc_text.weight
                    reg_Wz_loss = reg_Wz_loss + Wz.pow(2).sum(
                        dim=0).sqrt().sum().mul(opt.REG_Wz_LAMBDA)

                # writer.add_scalar("reg_Wz_loss"+str(part), reg_Wz_loss.item(), it)

                all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss
                all_loss.backward()
                optimizerGs[part].step()

        if it % opt.evl_interval == 0 and it > 500:
            print(it)
            for part in range(parts):
                netGs[part].eval()
            train_classifier(opt, param, dataset, netGs)
            for part in range(parts):
                netGs[part].train()
Example #8
0
def train(im_data, gen_model, method, name_file, niter, n_critic, lr,
          input_noise):
    nz = 100
    img_size = 64
    batch_size = 64
    beta1 = 0.5

    hyperparameters = {}
    hyperparameters['nz'] = nz
    hyperparameters['n_critic'] = n_critic
    hyperparameters['img_size'] = img_size
    hyperparameters['batch_size'] = batch_size
    hyperparameters['lr'] = lr
    hyperparameters['beta1'] = beta1

    dataloader = torch.utils.data.DataLoader(im_data, batch_size, shuffle=True)

    input = torch.FloatTensor(batch_size, 3, img_size, img_size)
    noise = torch.FloatTensor(batch_size, nz, 1, 1)
    fixed_noise = Variable(
        torch.FloatTensor(batch_size, nz, 1, 1).normal_(0, 1))

    label = torch.FloatTensor(batch_size)

    # real_label = 1
    # fake_label = 0

    if gen_model == 'nearest':
        netG = _netG_nearest()
    elif gen_model == 'bilinear':
        netG = _netG_bilinear()
    elif gen_model == 'transposed_conv2d':
        netG = _netG()

    netG.apply(weights_init)

    if method == 'GAN':
        netD = _netD(method)
    elif method == 'WGAN':
        netD = _netD(method)

    netD.apply(weights_init)
    criterion = nn.BCEWithLogitsLoss()

    if method == 'GAN':
        optimizerD = optim.Adam(netD.parameters(), lr, betas=(beta1, 0.9))
        optimizerG = optim.Adam(netG.parameters(), lr, betas=(beta1, 0.9))

    elif method == 'WGAN':
        optimizerD = optim.RMSprop(netD.parameters(), lr=lr)
        optimizerG = optim.RMSprop(netG.parameters(), lr=lr)

    if torch.cuda.is_available():
        netD.cuda()
        netG.cuda()
        criterion.cuda()
        input, label = input.cuda(), label.cuda()
        noise, fixed_noise = noise.cuda(), fixed_noise.cuda()

    epochl = []

    wdl = []
    errDml = []
    errGml = []

    errDsl = []
    errGsl = []

    errorGl = []
    errorDl = []

    dxl = []
    dgz1l = []
    dgz2l = []

    grad_netDl = []
    grad_netGl = []

    for epoch in range(niter):
        errDm = []
        errGm = []
        wdm = []
        for i, data in enumerate(dataloader):

            for j in range(n_critic):
                #############################################################
                # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
                #############################################################

                # train with real
                netD.zero_grad()

                # #Might want to add some gaussian noise to the data. This is where it's happening.
                if input_noise == 'yes':
                    sigma = 0.1
                    gaussian_noise = data.new(data.size()).normal_(0, sigma)
                    normalize = torch.max(torch.abs(data + gaussian_noise))
                    real_cpu = (data + gaussian_noise) / normalize

                else:
                    real_cpu = data

                batch_size = real_cpu.size(0)
                if torch.cuda.is_available():
                    real_cpu = real_cpu.cuda()

                # train with real
                input.resize_as_(real_cpu).copy_(real_cpu)
                real_label = gan_label(1, 'D')
                label.resize_(batch_size).fill_(real_label)

                inputv = Variable(input)
                labelv = Variable(label)

                output = netD(inputv)
                if method == 'GAN':
                    errD_real = torch.log(
                        output)  #criterion(output, labelv) # labelv = real
                elif method == 'WGAN':
                    errD_real = torch.mean(output)

                D_x = output.data.mean()

                # train with fake
                noise.resize_(batch_size, nz, 1, 1).normal_(0, 1)
                noisev = Variable(noise)
                fake = netG(noisev)
                fake_label = gan_label(0, 'D')
                labelv = Variable(label.fill_(fake_label))  # 0
                output = netD(fake.detach())
                if method == 'GAN':
                    errD_fake = torch.log(
                        1 - output)  #criterion(output, labelv) # labelv = fake
                elif method == 'WGAN':
                    errD_fake = torch.mean(output)

                D_G_z1 = output.data.mean()

                grad_penalty = calc_gradient_penalty(netD, inputv, fake)

                if method == 'GAN':
                    errD = -torch.mean(errD_real + errD_fake) + grad_penalty

                if method == 'WGAN':
                    errD = -(torch.mean(errD_real) -
                             torch.mean(errD_fake)) + grad_penalty

                errD.backward()
                optimizerD.step()

                if method == 'GAN':
                    pass
                if method == 'WGAN':
                    for p in netD.parameters():
                        p.data.clamp_(-0.05, 0.05)

                wd = torch.mean(errD_real - errD_fake)
            wdm.append(wd.data[0])
            errDm.append(errD.data[0])

            #############################################
            # (2) Update G network: maximize log(D(G(z)))
            #############################################

            netG.zero_grad()
            real_label = gan_label(1, 'G')
            labelv = Variable(label.fill_(
                real_label))  # fake labels are real for generator cost
            output = netD(
                fake
            )  # between 0, 1 ... it'S a good cop! because, we trained above!

            if method == 'GAN':
                errG = -torch.mean(torch.log(
                    output))  #criterion(output, labelv) # labelv = real
            elif method == 'WGAN':
                errG = -torch.mean(output)

            errG.backward()
            errGm.append(errG.data[0])

            D_G_z2 = output.data.mean()
            optimizerG.step()

            grad_netD = grad_norm(netD)
            grad_netG = grad_norm(netG)

            # torch.nn.utils.clip_grad_norm(netG.parameters(), 0.2, norm_type=2)

            print(
                '[{}/{}][{}/{}] Loss_D: {:8f} Loss_G: {:4f} D(x): {:4f} D(G(z)): {:4f} / {:4f} Grad_D: {:2f} Grad_D: {:2f}'
                .format(epoch + 1, niter, i + 1, len(dataloader), errD.data[0],
                        errG.data[0], D_x, D_G_z1, D_G_z2, grad_netD,
                        grad_netG))

            errorDl.append(errD.data[0])
            errorGl.append(errG.data[0])
            dxl.append(D_x)
            dgz1l.append(D_G_z1)
            dgz2l.append(D_G_z2)
            epochl.append(epoch)
            grad_netDl.append(grad_netD)
            grad_netGl.append(grad_netG)

        #print at the end of each epoch.
        fake = netG(fixed_noise)
        vutils.save_image(
            fake.data,
            'savedata/figures/{}_{}_fake_samples_epoch_{}_i_{}.png'.format(
                gen_model, method, epoch, i),
            normalize=True)
        # vutils.save_image(real_cpu,'figures/outputs/{}_real_samples_epoch_{}_i_{}.png'.format(gen_model, epoch, i), normalize=True)

        wdl.append(np.mean(np.array(wdm)))
        errDml.append(np.mean(np.array(errDm)))
        errGml.append(np.mean(np.array(errGm)))
        errDsl.append(np.std(np.array(errDm)))
        errGsl.append(np.std(np.array(errGm)))

        torch.save(
            netG.state_dict(),
            'savedata/models/{}_{}_netG_epoch_{}.pth'.format(
                name_file, method, epoch))
        torch.save(
            netD.state_dict(),
            'savedata/models/{}_{}_netD_epoch_{}.pth'.format(
                name_file, method, epoch))

        if epoch % 1 == 0:
            dd = {}
            if input_noise == 'yes':
                dd['sigma'] = sigma
            else:
                pass
            dd['wd'] = wdl
            dd['gen_model'] = gen_model
            dd['epoch'] = epochl
            dd['errDm'] = errDml
            dd['errGm'] = errGml
            dd['errDs'] = errDsl
            dd['errGs'] = errGsl
            dd['error_d'] = errorDl
            dd['error_g'] = errorGl
            dd['dx'] = dxl
            dd['dgz1'] = dgz1l
            dd['dgz2'] = dgz2l
            dd['grad_netD'] = grad_netDl
            dd['grad_netG'] = grad_netGl

            filename = 'savedata/data/{}_epoch_{}.pkl'.format(name_file, epoch)
            with open(filename, 'wb') as f:
                pickle.dump([hyperparameters, dd], f)

    dd = {}
    if input_noise == 'yes':
        dd['sigma'] = sigma
    else:
        pass
    dd['wd'] = wdl
    dd['gen_model'] = gen_model
    dd['epoch'] = epochl
    dd['errDm'] = errDml
    dd['errGm'] = errGml
    dd['errDs'] = errDsl
    dd['errGs'] = errGsl
    dd['error_d'] = errorDl
    dd['error_g'] = errorGl
    dd['dx'] = dxl
    dd['dgz1'] = dgz1l
    dd['dgz2'] = dgz2l
    dd['grad_netD'] = grad_netDl
    dd['grad_netG'] = grad_netGl
    filename = 'savedata/data/{}_epoch_{}.pkl'.format(name_file, epoch)
    with open(filename, 'wb') as f:
        pickle.dump([hyperparameters, dd], f)
    print()
    print(dd['wd'])
Example #9
0
def train():
    param = _param()
    dataset = LoadDataset(opt)
    param.X_dim = dataset.feature_dim

    data_layer = FeatDataLayer(dataset.labels_train,
                               dataset.pfc_feat_data_train, opt)

    # initialize model
    netGs = []
    netDs = []
    parts = 6  #if opt.dataset == "CUB2011" else 6
    for part in range(parts):
        netGs.append(_netG(dataset.text_dim, 512).cuda().apply(weights_init))
        netDs.append(
            _netD(dataset.train_cls_num, 512).cuda().apply(weights_init))

    exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD'
    exp_params = 'Eu{}_Rls{}_RWz{}'.format(opt.CENT_LAMBDA, opt.REG_W_LAMBDA,
                                           opt.REG_Wz_LAMBDA)

    out_dir = 'out/{:s}'.format(exp_info)
    out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params)
    if not os.path.exists('out'):
        os.mkdir('out')
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    if not os.path.exists(out_subdir):
        os.mkdir(out_subdir)

    cprint(" The output dictionary is {}".format(out_subdir), 'red')
    log_dir = out_subdir + '/log_{:s}.txt'.format(exp_info)
    with open(log_dir, 'a') as f:
        f.write('Training Start:')
        f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n')

    start_step = 0

    part_cls_centrild = torch.from_numpy(
        dataset.part_cls_centrild.astype('float32')).cuda()

    # initialize optimizers
    optimizerGs = []
    optimizerDs = []
    for netG in netGs:
        optimizerGs.append(
            optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9)))
    for netD in netDs:
        optimizerDs.append(
            optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9)))

    for it in range(start_step, 3000 + 1):
        """ Discriminator """
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array(
                [dataset.train_text_feature[i, :] for i in labels])
            text_feat = torch.from_numpy(text_feat.astype('float32')).cuda()
            X = torch.from_numpy(feat_data).cuda()
            y_true = torch.from_numpy(labels.astype('int')).cuda()
            z = torch.randn(opt.batchsize, param.z_dim).cuda()

            for part in range(parts):
                z = torch.randn(opt.batchsize, param.z_dim).cuda()
                D_real, C_real = netDs[part](X[:, part * 512:(part + 1) * 512])
                D_loss_real = torch.mean(D_real)
                C_loss_real = F.cross_entropy(C_real, y_true)
                DC_loss = -D_loss_real + C_loss_real
                DC_loss.backward()

                G_sample = netGs[part](z, text_feat)
                D_fake, C_fake = netDs[part](G_sample)
                D_loss_fake = torch.mean(D_fake)
                C_loss_fake = F.cross_entropy(C_fake, y_true)
                DC_loss = D_loss_fake + C_loss_fake
                DC_loss.backward()

                grad_penalty = calc_gradient_penalty(
                    netDs[part], X.data[:, part * 512:(part + 1) * 512],
                    G_sample.data)
                grad_penalty.backward()

                Wasserstein_D = D_loss_real - D_loss_fake
                # writer.add_scalar("Wasserstein_D"+str(part), Wasserstein_D.item(), it)

                optimizerDs[part].step()
                netGs[part].zero_grad()
                netDs[part].zero_grad()
        """ Generator """
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array(
                [dataset.train_text_feature[i, :] for i in labels])
            text_feat = torch.from_numpy(text_feat.astype('float32')).cuda()

            X = torch.from_numpy(feat_data).cuda()
            y_true = torch.from_numpy(labels.astype('int')).cuda()

            for part in range(parts):
                z = torch.randn(opt.batchsize, param.z_dim).cuda()
                G_sample = netGs[part](z, text_feat)
                # G_sample_all[:, part*512:(part+1)*512] = G_sample
                D_fake, C_fake = netDs[part](G_sample)
                _, C_real = netDs[part](X[:, part * 512:(part + 1) * 512])

                G_loss = torch.mean(D_fake)
                C_loss = (F.cross_entropy(C_real, y_true) +
                          F.cross_entropy(C_fake, y_true)) / 2
                GC_loss = -G_loss + C_loss
                # writer.add_scalar("GC_loss"+str(part), GC_loss.item(), it)

                Euclidean_loss = torch.tensor([0.0]).cuda()
                if opt.REG_W_LAMBDA != 0:
                    for i in range(dataset.train_cls_num):
                        sample_idx = (y_true == i).data.nonzero().squeeze()
                        if sample_idx.numel() == 0:
                            Euclidean_loss += 0.0
                        else:
                            G_sample_cls = G_sample[sample_idx, :]
                            Euclidean_loss += (G_sample_cls.mean(dim=0) -
                                               part_cls_centrild[i][part]
                                               ).pow(2).sum().sqrt()
                    Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA

                # ||W||_2 regularization
                reg_loss = torch.Tensor([0.0]).cuda()
                if opt.REG_W_LAMBDA != 0:

                    for name, p in netGs[part].named_parameters():
                        if 'weight' in name:
                            reg_loss += p.pow(2).sum()
                    reg_loss.mul_(opt.REG_W_LAMBDA)

                # writer.add_scalar("reg_loss"+str(part), reg_loss.item(), it)

                # ||W_z||21 regularization, make W_z sparse
                reg_Wz_loss = torch.Tensor([0.0]).cuda()
                if opt.REG_Wz_LAMBDA != 0:
                    Wz = netGs[part].rdc_text.weight
                    reg_Wz_loss = reg_Wz_loss + Wz.pow(2).sum(
                        dim=0).sqrt().sum().mul(opt.REG_Wz_LAMBDA)

                # writer.add_scalar("reg_Wz_loss"+str(part), reg_Wz_loss.item(), it)

                all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss
                all_loss.backward()
                optimizerGs[part].step()

        if it % opt.evl_interval == 0 and it >= 1000:
            print(it)
            for part in range(parts):
                netGs[part].eval()
            train_classifier(opt, param, dataset, netGs)
            for part in range(parts):
                netGs[part].train()
Example #10
0
def train():
    start_time = time.time()
    param = _param()
    dataset = LoadDataset(opt)
    param.X_dim = dataset.feature_dim

    data_layer = FeatDataLayer(dataset.labels_train, dataset.pfc_feat_data_train, dataset.seen_label_mapping, opt)
    result = Result()
    result_gzsl = Result()
    netG = _netG(dataset.text_dim, dataset.feature_dim).cuda()
    netG.apply(weights_init)

    print(netG)
    netD = _netD(dataset.train_cls_num + dataset.test_cls_num, dataset.feature_dim).cuda()
    netD.apply(weights_init)
    print(netD)

    exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD'
    exp_params = 'Eu{}_Rls{}_RWz{}'.format(opt.CENT_LAMBDA , opt.REG_W_LAMBDA, opt.REG_Wz_LAMBDA)

    out_dir  = 'out_' + str(opt.epsilon) + '/{:s}'.format(exp_info)
    out_subdir = 'out_' + str(opt.epsilon) + '/{:s}/{:s}'.format(exp_info, exp_params)

    if not os.path.exists('out_' + str(opt.epsilon) ):
        os.mkdir('out_' + str(opt.epsilon))
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    if not os.path.exists(out_subdir):
        os.mkdir(out_subdir)

    cprint(" The output dictionary is {}".format(out_subdir), 'red')
    log_dir  = out_subdir + '/log_{:s}.txt'.format(exp_info)
        
    with open(log_dir, 'a') as f:
        f.write('Training Start:')
        f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n')
        f.write("Running Parameter Logs")
        f.write(runing_parameters_logs)
        
    start_step = 0

    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            netG.load_state_dict(checkpoint['state_dict_G'])
            netD.load_state_dict(checkpoint['state_dict_D'])
            start_step = checkpoint['it']
            print(checkpoint['log'])
            log_text = checkpoint['log']
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    nets = [netG, netD]

    tr_cls_centroid = Variable(torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda()
    optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    
    for it in range(start_step, 5000+1):
        if it > opt.mode_change: 
            train_text = Variable(torch.from_numpy(dataset.train_text_feature.astype('float32'))).cuda()
            test_text = Variable(torch.from_numpy(dataset.test_text_feature.astype('float32'))).cuda()
            z_train = Variable(torch.randn(dataset.train_cls_num, param.z_dim)).cuda()
            z_test = Variable(torch.randn(dataset.test_cls_num, param.z_dim)).cuda()
            
            _, train_text_feature = netG(z_train, train_text) 
            _, test_text_feature = netG(z_test, test_text) 

            dataset.semantic_similarity_check(opt.Knn, train_text_feature.data.cpu().numpy(), test_text_feature.data.cpu().numpy())

        """ Discriminator """
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']              # image data
            labels = blobs['labels'].astype(int)   # class labels
            true_labels = blobs['true_labels'].astype(int)

            text_feat = np.array([dataset.train_text_feature[i,:] for i in labels])
            text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(true_labels.astype('int'))).cuda()
            
            z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

            # GAN's D loss
            D_real, C_real = netD(X)
            D_loss_real = torch.mean(D_real)
            C_loss_real = F.cross_entropy(C_real, y_true)
            DC_loss = -D_loss_real + C_loss_real
            DC_loss.backward()

            # GAN's D loss
            G_sample, _ = netG(z, text_feat) 
            D_fake, C_fake = netD(G_sample)
            D_loss_fake = torch.mean(D_fake)
            C_loss_fake = F.cross_entropy(C_fake, y_true)
            DC_loss = D_loss_fake + C_loss_fake
            DC_loss.backward()

            # train with gradient penalty (WGAN_GP)
            grad_penalty = calc_gradient_penalty(netD, X.data, G_sample.data)
            grad_penalty.backward()

            Wasserstein_D = D_loss_real - D_loss_fake
            optimizerD.step()
            reset_grad(nets)
            
        """ Generator """
        for _ in range(1):            
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            true_labels = blobs['true_labels'].astype(int) #True seen label class 

            text_feat = np.array([dataset.train_text_feature[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda()

            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(true_labels.astype('int'))).cuda()
            y_dummy = Variable(torch.from_numpy(labels.astype('int'))).cuda()

            z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

            G_sample, _ = netG(z, text_feat)
            D_fake, C_fake = netD(G_sample)
            _,      C_real = netD(X)

            # GAN's G loss
            G_loss = torch.mean(D_fake)
            # Auxiliary classification loss
            C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true))/2 

            GC_loss = -G_loss + C_loss
            
            # Centroid loss
            Euclidean_loss = Variable(torch.Tensor([0.0])).cuda()
            Correlation_loss = Variable(torch.Tensor([0.0])).cuda()

            if opt.CENT_LAMBDA != 0:
                for i in range(dataset.train_cls_num):
                    sample_idx = (y_dummy == i).data.nonzero().squeeze()
                    if sample_idx.numel() == 0:
                        Euclidean_loss += 0.0
                    else:                        
                        G_sample_cls = G_sample[sample_idx, :]
                        if sample_idx.numel() != 1:
                            generated_mean = G_sample_cls.mean(dim=0) 
                        else:
                            generated_mean = G_sample_cls

                        Euclidean_loss += (generated_mean - tr_cls_centroid[i]).pow(2).sum().sqrt()

                        for n in range(dataset.Neighbours):                            
                            Neighbor_correlation = cosine_similarity(generated_mean.data.cpu().numpy().reshape((1, dataset.feature_dim)), 
                                                    tr_cls_centroid[dataset.idx_mat[i,n]].data.cpu().numpy().reshape((1, dataset.feature_dim)))

                            lower_limit = dataset.semantic_similarity_seen [i,n] - opt.epsilon
                            upper_limit = dataset.semantic_similarity_seen [i,n] + opt.epsilon

                            lower_limit = torch.as_tensor(lower_limit.astype('float')) 
                            upper_limit = torch.as_tensor(upper_limit.astype('float')) 
                            corr = torch.as_tensor(Neighbor_correlation[0][0].astype('float'))
                            margin = (torch.max(corr- corr, corr - upper_limit))**2 + (torch.max(corr- corr, lower_limit - corr ))**2 
                            Correlation_loss += margin           
                                                
                Euclidean_loss *= 1.0/dataset.train_cls_num * opt.CENT_LAMBDA
                Correlation_loss = Correlation_loss * opt.correlation_penalty

            # ||W||_2 regularization
            reg_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for name, p in netG.named_parameters():
                    if 'weight' in name:
                        reg_loss += p.pow(2).sum()
                reg_loss.mul_(opt.REG_W_LAMBDA)

            # ||W_z||21 regularization, make W_z sparse
            reg_Wz_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_Wz_LAMBDA != 0:
                Wz = netG.rdc_text.weight
                reg_Wz_loss = Wz.pow(2).sum(dim=0).sqrt().sum().mul(opt.REG_Wz_LAMBDA)

            all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss + Correlation_loss
            all_loss.backward()
            optimizerG.step()
            reset_grad(nets)

        if (it > opt.unseen_start):
            for _ in range(1):
                # Zero shot Discriminator is training 
                zero_shot_labels = np.random.randint(dataset.test_cls_num, size = opt.zeroshotbatchsize).astype(int)
                zero_shot_true_labels = np.array([dataset.unseen_label_mapping[i] for i in zero_shot_labels])
                zero_text_feat = np.array([dataset.test_text_feature[i,:] for i in zero_shot_labels])
                
                zero_text_feat = Variable(torch.from_numpy(zero_text_feat.astype('float32'))).cuda()
                zero_y_true = Variable(torch.from_numpy(zero_shot_true_labels.astype('int'))).cuda()
                z = Variable(torch.randn(opt.zeroshotbatchsize, param.z_dim)).cuda()

                # GAN's D loss
                G_sample_zero, _ = netG(z, zero_text_feat)
                _, C_fake_zero = netD(G_sample_zero)
                C_loss_fake_zero = F.cross_entropy(C_fake_zero, zero_y_true)
                C_loss_fake_zero.backward()

                optimizerD.step()
                reset_grad(nets)
                
            for _ in range(1):
                # Zero shot Generator is training 
                zero_shot_labels = np.random.randint(dataset.test_cls_num, size = opt.zeroshotbatchsize).astype(int)
                zero_shot_true_labels = np.array([dataset.unseen_label_mapping[i] for i in zero_shot_labels])
                zero_text_feat = np.array([dataset.test_text_feature[i,:] for i in zero_shot_labels])
                
                zero_text_feat = Variable(torch.from_numpy(zero_text_feat.astype('float32'))).cuda()
                zero_y_true = Variable(torch.from_numpy(zero_shot_true_labels.astype('int'))).cuda()
                y_dummy_zero = Variable(torch.from_numpy(zero_shot_labels.astype('int'))).cuda()
                z = Variable(torch.randn(opt.zeroshotbatchsize, param.z_dim)).cuda()

                # GAN's D loss
                G_sample_zero, _ = netG(z, zero_text_feat)
                _, C_fake_zero  = netD(G_sample_zero)
                C_loss_fake_zero = F.cross_entropy(C_fake_zero, zero_y_true)
                
                Correlation_loss_zero = Variable(torch.Tensor([0.0])).cuda()

                if opt.CENT_LAMBDA != 0:
                    for i in range(dataset.test_cls_num):
                        sample_idx = (y_dummy_zero == i).data.nonzero().squeeze()
                        if sample_idx.numel() != 0:
                            G_sample_cls = G_sample_zero[sample_idx, :]
                            
                            if sample_idx.numel() != 1:
                                generated_mean = G_sample_cls.mean(dim=0) 
                            else:
                                generated_mean = G_sample_cls

                            for n in range(dataset.Neighbours):                            
                                Neighbor_correlation = cosine_similarity(generated_mean.data.cpu().numpy().reshape((1, dataset.feature_dim)), 
                                                        tr_cls_centroid[dataset.unseen_idx_mat[i,n]].data.cpu().numpy().reshape((1, dataset.feature_dim)))
                                
                                lower_limit = dataset.semantic_similarity_unseen [i,n] - opt.epsilon
                                upper_limit = dataset.semantic_similarity_unseen [i,n] + opt.epsilon

                                lower_limit = torch.as_tensor(lower_limit.astype('float')) 
                                upper_limit = torch.as_tensor(upper_limit.astype('float')) 
                                corr = torch.as_tensor(Neighbor_correlation[0][0].astype('float'))

                                margin = (torch.max(corr- corr, corr - upper_limit))**2 + (torch.max(corr- corr, lower_limit - corr ))**2 
                    
                                Correlation_loss_zero += margin           

                    Correlation_loss_zero = Correlation_loss_zero *opt.correlation_penalty

                # ||W||_2 regularization
                reg_loss_zero = Variable(torch.Tensor([0.0])).cuda()
                if opt.REG_W_LAMBDA != 0:
                    for name, p in netG.named_parameters():
                        if 'weight' in name:
                            reg_loss_zero += p.pow(2).sum()
                    reg_loss_zero.mul_(opt.REG_W_LAMBDA)

                # ||W_z||21 regularization, make W_z sparse
                reg_Wz_loss_zero = Variable(torch.Tensor([0.0])).cuda()
                if opt.REG_Wz_LAMBDA != 0:
                    Wz = netG.rdc_text.weight
                    reg_Wz_loss_zero = Wz.pow(2).sum(dim=0).sqrt().sum().mul(opt.REG_Wz_LAMBDA)

                all_loss = C_loss_fake_zero +  reg_loss_zero + reg_Wz_loss_zero + Correlation_loss_zero 
                all_loss.backward()
                optimizerG.step()
                reset_grad(nets)
            
        if it % opt.disp_interval == 0 and it:
            acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0])
            acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0])
            
            log_text = 'Iter-{}; Was_D: {:.4}; Euc_ls: {:.4};reg_ls: {:.4}; Wz_ls: {:.4}; G_loss: {:.4}; Correlation_loss : {:.4} ; D_loss_real: {:.4};' \
                       ' D_loss_fake: {:.4}; rl: {:.4}%; fk: {:.4}%'.format(it, Wasserstein_D.item(),  Euclidean_loss.item(), reg_loss.item(), reg_Wz_loss.item(),
                                G_loss.item(), Correlation_loss.item() , D_loss_real.item(), D_loss_fake.item(), acc_real * 100, acc_fake * 100)
            
            log_text1 = ""
            if it > opt.unseen_start :
                acc_fake_zero = (np.argmax(C_fake_zero.data.cpu().numpy(), axis=1) == zero_y_true.data.cpu().numpy()).sum() / float(zero_y_true.data.size()[0])

                log_text1 = 'Zero_Shot_Iter-{}; Correlation_loss : {:.4}; fk: {:.4}%'.format(it,  
                                    Correlation_loss_zero.item(), acc_fake_zero * 100)
                
            '''
            Here I have added .item instead of the .data[0] - Maunil 
            '''
            
            print(log_text)
            print (log_text1)
            with open(log_dir, 'a') as f:
                f.write(log_text+'\n')
                f.write(log_text1+'\n')
                
        if it % opt.evl_interval == 0 and it >=80 and log_text != None:
            netG.eval()    # This will start the testing process, no batch norm and drop out - It will disable them
            eval_fakefeat_test(it, netG, netD, dataset, param, result)
            eval_fakefeat_GZSL(it, netG, dataset, param, result_gzsl)

            if result.save_model:
                files2remove = glob.glob(out_subdir + '/Best_model*')
                for _i in files2remove:
                    os.remove(_i)
                torch.save({
                    'it': it + 1,
                    'state_dict_G': netG.state_dict(),
                    'state_dict_D': netD.state_dict(),
                    'random_seed': opt.manualSeed,
                    'log': log_text,
                    'Zero Shot Acc' : result.acc_list[-1],
                    'Generalized Zero Shot Acc' :  result_gzsl.acc_list[-1]
                }, out_subdir + '/Best_model_Acc_' + str(result.acc_list[-1])  + '_AUC_' + str(result_gzsl.acc_list[-1])  + '_' +'.tar')
            netG.train()  

        if it % opt.save_interval == 0 and it:
            torch.save({
                    'it': it + 1,
                    'state_dict_G': netG.state_dict(),
                    'state_dict_D': netD.state_dict(),
                    'random_seed': opt.manualSeed,
                    'log': log_text,
                    'Zero Shot Acc' : result.acc_list[-1],
                    'Generalized Zero Shot Acc' : result_gzsl.acc_list[-1]
                },  out_subdir + '/Iter_{:d}.tar'.format(it))
            cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it), 'red')

    print ("########################################################")
    print("--- %s Time took seconds ---" % (time.time() - start_time))
    print ("########################################################")
Example #11
0
def train(model_num=3,
          is_val=True,
          sim_func_number=None,
          creative_weight=None):
    param = _param(opt.z_dim)
    best_model_acc_path = best_model_auc_path = best_model_hm_path = ''
    if opt.dataset == 'CUB':
        dataset = LoadDataset(opt, main_dir, is_val)
        exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD'
        opt.is_gbu = False
    elif opt.dataset == 'NAB':
        dataset = LoadDataset_NAB(opt, main_dir, is_val)
        exp_info = 'NAB_EASY' if opt.splitmode == 'easy' else 'NAB_HARD'
        opt.is_gbu = False
    elif "GBU" in opt.dataset:
        opt.dataset = opt.dataset.split('_')[1]
        opt.is_gbu = True
        exp_info = opt.dataset
        dataset = LoadDataset_GBU(opt, main_dir, is_val)
    else:
        print('No Dataset with that name')
        sys.exit(0)
    param.X_dim = dataset.feature_dim

    data_layer = FeatDataLayer(np.array(dataset.train_label),
                               np.array(dataset.train_feature), opt)
    result = Result()

    ones = Variable(torch.Tensor(1, 1))
    ones.data.fill_(1.0)

    if opt.is_gbu:
        netG = _netG_att(param, dataset.text_dim, dataset.feature_dim).cuda()
    else:
        netG = _netG(dataset.text_dim, dataset.feature_dim).cuda()
    netG.apply(weights_init)
    netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda()
    netD.apply(weights_init)

    if model_num == 2 or model_num == 4:
        log_SM_ab = Scale(2)
        log_SM_ab = nn.DataParallel(log_SM_ab).cuda()
    if model_num == 3 or model_num == 4:
        netT = _netT(dataset.train_cls_num, dataset.feature_dim,
                     dataset.text_dim).cuda()
        netT.apply(weights_init)

    similarity_func = None
    if sim_func_number == 1:
        similarity_func = F.cosine_similarity
    elif sim_func_number == 2:
        similarity_func = F.mse_loss

    exp_params = 'Model_{}_is_val_{}_sim_func_number_{}_creative_weight_{}_lr_{}_zdim_{}_{}'.format(
        model_num, is_val, sim_func_number, creative_weight, opt.lr,
        param.z_dim, opt.exp_name)

    out_subdir = main_dir + 'out/{:s}/{:s}'.format(exp_info, exp_params)
    if not os.path.exists(out_subdir):
        os.makedirs(out_subdir)

    log_dir = out_subdir + '/log_{:s}.txt'.format(exp_info)
    log_dir_2 = out_subdir + '/log_{:s}_iterations.txt'.format(exp_info)
    with open(log_dir, 'a') as f:
        f.write('Training Start:')
        f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n')

    start_step = 0

    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            netG.load_state_dict(checkpoint['state_dict_G'])
            netD.load_state_dict(checkpoint['state_dict_D'])
            if model_num == 3 or model_num == 4:
                netT.load_state_dict(checkpoint['state_dict_T'])
            start_step = checkpoint['it']
            print(checkpoint['log'])
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    if model_num == 1:
        nets = [netG, netD]
    elif model_num == 2:
        nets = [netG, netD, log_SM_ab]
    elif model_num == 3:
        nets = [netG, netD, netT]
    elif model_num == 4:
        nets = [netG, netD, netT, log_SM_ab]

    tr_cls_centroid = Variable(
        torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda()
    optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9))

    if model_num == 2 or model_num == 4:
        optimizer_SM_ab = optim.Adam(log_SM_ab.parameters(),
                                     lr=opt.lr,
                                     betas=(0.5, 0.999))
    if model_num == 3 or model_num == 4:
        optimizerT = optim.Adam(netT.parameters(), lr=opt.lr, betas=(0.5, 0.9))

    for it in tqdm(range(start_step, 5000 + 1)):
        blobs = data_layer.forward()
        labels = blobs['labels'].astype(int)
        new_class_labels = Variable(
            torch.from_numpy(np.ones_like(labels) *
                             dataset.train_cls_num)).cuda()
        text_feat_1 = np.array([dataset.train_att[i, :] for i in labels])
        text_feat_2 = np.array([dataset.train_att[i, :] for i in labels])
        np.random.shuffle(
            text_feat_1
        )  # Shuffle both features to guarantee different permutations
        np.random.shuffle(text_feat_2)
        alpha = (np.random.random(len(labels)) * (.8 - .2)) + .2

        text_feat_mean = np.multiply(alpha, text_feat_1.transpose())
        text_feat_mean += np.multiply(1. - alpha, text_feat_2.transpose())
        text_feat_mean = text_feat_mean.transpose()
        text_feat_mean = normalize(text_feat_mean, norm='l2', axis=1)
        text_feat_Creative = Variable(
            torch.from_numpy(text_feat_mean.astype('float32'))).cuda()
        # z_creative = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()
        # G_creative_sample = netG(z_creative, text_feat_Creative)

        if model_num == 3 or model_num == 4:
            """ Text Feat Generator """
            for _ in range(5):
                blobs = data_layer.forward()
                feat_data = blobs['data']  # image data
                labels = blobs['labels'].astype(int)  # class labels

                text_feat = np.array([dataset.train_att[i, :] for i in labels])
                text_feat_TG = Variable(
                    torch.from_numpy(text_feat.astype('float32'))).cuda()
                X = Variable(torch.from_numpy(feat_data)).cuda()
                y_true = Variable(torch.from_numpy(
                    labels.astype('int'))).cuda()
                z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

                # GAN's T loss
                T_real = netT(X)
                T_loss_real = torch.mean(similarity_func(text_feat_TG, T_real))

                # GAN's T loss
                G_sample = netG(z, text_feat_TG).detach()
                T_fake_TG = netT(G_sample)
                T_loss_fake = torch.mean(
                    similarity_func(text_feat_TG, T_fake_TG))

                # GAN's T loss
                G_sample_creative = netG(z, text_feat_Creative).detach()
                T_fake_creative_TG = netT(G_sample_creative)
                T_loss_fake_creative = torch.mean(
                    similarity_func(text_feat_Creative, T_fake_creative_TG))

                T_loss = -1 * T_loss_real - T_loss_fake - T_loss_fake_creative
                T_loss.backward()

                optimizerT.step()
                optimizerG.step()
                reset_grad(nets)
        """ Discriminator """
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels

            text_feat = np.array([dataset.train_att[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

            # GAN's D loss
            D_real, C_real = netD(X)
            D_loss_real = torch.mean(D_real)
            C_loss_real = F.cross_entropy(C_real, y_true)
            DC_loss = -D_loss_real + C_loss_real
            DC_loss.backward()

            # GAN's D loss
            G_sample = netG(z, text_feat).detach()
            D_fake, C_fake = netD(G_sample)
            D_loss_fake = torch.mean(D_fake)
            C_loss_fake = F.cross_entropy(C_fake, y_true)

            DC_loss = D_loss_fake + C_loss_fake
            DC_loss.backward()

            # train with gradient penalty (WGAN_GP)
            grad_penalty = calc_gradient_penalty(netD, X.data, G_sample.data)
            grad_penalty.backward()

            Wasserstein_D = D_loss_real - D_loss_fake
            optimizerD.step()
            reset_grad(nets)
        """ Generator """
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_att[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()

            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

            G_sample = netG(z, text_feat)
            D_fake, C_fake = netD(G_sample)
            _, C_real = netD(X)

            # GAN's G loss
            G_loss = torch.mean(D_fake)
            # Auxiliary classification loss
            C_loss = (F.cross_entropy(C_real, y_true) +
                      F.cross_entropy(C_fake, y_true)) / 2

            # GAN's G loss creative
            G_sample_creative = netG(z, text_feat_Creative).detach()

            if model_num == 3 or model_num == 4:
                D_creative_fake, _ = netD(G_sample_creative)
                G_loss_fake_creative = torch.mean(D_creative_fake)
                T_fake = netT(G_sample)
                T_loss_fake = torch.mean(similarity_func(text_feat, T_fake))

                T_fake_creative = netT(G_sample_creative)
                T_loss_fake_creative = torch.mean(
                    similarity_func(text_feat_Creative, T_fake_creative))

                GC_loss = -G_loss - G_loss_fake_creative + C_loss - T_loss_fake - T_loss_fake_creative
            else:
                GC_loss = -G_loss + C_loss

            # Centroid loss
            Euclidean_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for i in range(dataset.train_cls_num):
                    sample_idx = (y_true == i).data.nonzero().squeeze()
                    if sample_idx.numel() == 0:
                        Euclidean_loss += 0.0
                    else:
                        G_sample_cls = G_sample[sample_idx, :]
                        Euclidean_loss += (
                            G_sample_cls.mean(dim=0) -
                            tr_cls_centroid[i]).pow(2).sum().sqrt()
                Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA

            # ||W||_2 regularization
            reg_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for name, p in netG.named_parameters():
                    if 'weight' in name:
                        reg_loss += p.pow(2).sum()
                reg_loss.mul_(opt.REG_W_LAMBDA)

            # ||W_z||21 regularization, make W_z sparse
            reg_Wz_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_Wz_LAMBDA != 0 and not opt.is_gbu:
                Wz = netG.rdc_text.weight
                reg_Wz_loss = Wz.pow(2).sum(dim=0).sqrt().sum().mul(
                    opt.REG_Wz_LAMBDA)

            if model_num == 2 or model_num == 4:
                # D(C| GX_fake)) + Classify GX_fake as real
                D_creative_fake, C_creative_fake = netD(G_sample_creative)
                G_fake_C = F.softmax(C_creative_fake)
                # SM Divergence
                q_shape = Variable(
                    torch.FloatTensor(G_fake_C.data.size(0),
                                      G_fake_C.data.size(1))).cuda()
                q_shape.data.fill_(1.0 / G_fake_C.data.size(1))

                SM_ab = F.sigmoid(log_SM_ab(ones))
                SM_a = 0.2 + torch.div(SM_ab[0][0], 1.6666666666666667).cuda()
                SM_b = 0.2 + torch.div(SM_ab[0][1], 1.6666666666666667).cuda()
                pow_a_b = torch.div(1 - SM_a, 1 - SM_b)
                alpha_term = (torch.pow(G_fake_C + 1e-5, SM_a) *
                              torch.pow(q_shape, 1 - SM_a)).sum(1)
                entropy_GX_fake_vec = torch.div(
                    torch.pow(alpha_term, pow_a_b) - 1, SM_b - 1)

                min_e, max_e = torch.min(entropy_GX_fake_vec), torch.max(
                    entropy_GX_fake_vec)
                entropy_GX_fake_vec = (entropy_GX_fake_vec - min_e) / (max_e -
                                                                       min_e)
                entropy_GX_fake = -entropy_GX_fake_vec.mean()
                loss_creative = -creative_weight * entropy_GX_fake

                disc_GX_fake_real = -torch.mean(D_creative_fake)
                total_loss_creative = loss_creative + disc_GX_fake_real

                all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss + total_loss_creative
            else:
                all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss

            all_loss.backward()

            if model_num == 2 or model_num == 4:
                optimizer_SM_ab.step()
            optimizerG.step()
            reset_grad(nets)

        if it % opt.disp_interval == 0 and it:
            acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])
            acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])

            log_text = 'Iter-{}; rl: {:.4}%; fk: {:.4}%'.format(
                it, acc_real * 100, acc_fake * 100)
            with open(log_dir, 'a') as f:
                f.write(log_text + '\n')

        if it % opt.evl_interval == 0 and it > opt.disp_interval:
            cur_acc = 0
            cur_auc = 0
            cur_hm = 0

            netG.eval()
            if is_val:
                cur_acc = eval_fakefeat_test(netG, dataset.val_cls_num,
                                             dataset.val_att,
                                             dataset.val_unseen_feature,
                                             dataset.val_unseen_label, param,
                                             result)

                if opt.is_gbu:
                    cur_hm, acc_S_T, acc_U_T = eval_fakefeat_test_gzsl(
                        netG, dataset, dataset.val_cls_num, dataset.val_att,
                        dataset.val_unseen_feature, dataset.val_unseen_label,
                        param, result)

                else:
                    cur_auc = eval_fakefeat_GZSL(netG, dataset,
                                                 dataset.val_cls_num,
                                                 dataset.val_att,
                                                 dataset.val_unseen_feature,
                                                 dataset.val_unseen_label,
                                                 param, out_subdir, result)
            else:
                cur_acc = eval_fakefeat_test(netG, dataset.test_cls_num,
                                             dataset.test_att,
                                             dataset.test_unseen_feature,
                                             dataset.test_unseen_label, param,
                                             result)

                if opt.is_gbu:
                    cur_hm, acc_S_T, acc_U_T = eval_fakefeat_test_gzsl(
                        netG, dataset, dataset.test_cls_num, dataset.test_att,
                        dataset.test_unseen_feature, dataset.test_unseen_label,
                        param, result)

                else:
                    cur_auc = eval_fakefeat_GZSL(netG, dataset,
                                                 dataset.test_cls_num,
                                                 dataset.test_att,
                                                 dataset.test_unseen_feature,
                                                 dataset.test_unseen_label,
                                                 param, out_subdir, result)

            if cur_acc > result.best_acc:
                result.best_acc = cur_acc

                files2remove = glob.glob(out_subdir + '/Best_model_ACC*')
                for _i in files2remove:
                    os.remove(_i)

                save_dict = {
                    'it': it + 1,
                    'state_dict_G': netG.state_dict(),
                    'state_dict_D': netD.state_dict(),
                    'random_seed': opt.manualSeed,
                    'log': log_text,
                }

                if model_num == 3 or model_num == 4:
                    save_dict.update({'state_dict_T': netT.state_dict()})
                best_model_acc_path = '/Best_model_ACC_{:.2f}.tar'.format(
                    cur_acc)
                torch.save(save_dict, out_subdir + best_model_acc_path)

            if cur_auc > result.best_auc:
                result.best_auc = cur_auc

                files2remove = glob.glob(out_subdir + '/Best_model_AUC*')
                for _i in files2remove:
                    os.remove(_i)

                save_dict = {
                    'it': it + 1,
                    'state_dict_G': netG.state_dict(),
                    'state_dict_D': netD.state_dict(),
                    'random_seed': opt.manualSeed,
                    'log': log_text,
                }

                if model_num == 3 or model_num == 4:
                    save_dict.update({'state_dict_T': netT.state_dict()})
                best_model_auc_path = '/Best_model_AUC_{:.2f}.tar'.format(
                    cur_auc)
                torch.save(save_dict, out_subdir + best_model_auc_path)

            if cur_hm > result.best_hm:
                result.best_hm = cur_hm
                result.best_acc_S_T = acc_S_T
                result.best_acc_U_T = acc_U_T

                files2remove = glob.glob(out_subdir + '/Best_model_HM*')
                for _i in files2remove:
                    os.remove(_i)

                save_dict = {
                    'it': it + 1,
                    'state_dict_G': netG.state_dict(),
                    'state_dict_D': netD.state_dict(),
                    'random_seed': opt.manualSeed,
                    'log': log_text,
                }

                if model_num == 3 or model_num == 4:
                    save_dict.update({'state_dict_T': netT.state_dict()})
                best_model_hm_path = '/Best_model_HM_{:.2f}.tar'.format(cur_hm)
                torch.save(save_dict, out_subdir + best_model_hm_path)

            log_text_2 = 'iteration: %f, best_acc: %f, best_auc: %f, best_hm: %f' % (
                it, result.best_acc, result.best_auc, result.best_hm)
            with open(log_dir_2, 'a') as f:
                f.write(log_text_2 + '\n')
            netG.train()

    if is_val:
        if os.path.isfile(out_subdir + best_model_acc_path):
            print("=> loading checkpoint '{}'".format(best_model_acc_path))
            checkpoint = torch.load(out_subdir + best_model_acc_path)
            netG.load_state_dict(checkpoint['state_dict_G'])
            netD.load_state_dict(checkpoint['state_dict_D'])
            if model_num == 3 or model_num == 4:
                netT.load_state_dict(checkpoint['state_dict_T'])
            it = checkpoint['it']
            print("iteration: {}".format(it))

            netG.eval()
            test_acc = eval_fakefeat_test(netG, dataset.test_cls_num,
                                          dataset.test_att,
                                          dataset.test_unseen_feature,
                                          dataset.test_unseen_label, param,
                                          result)

            result.test_acc = test_acc
        else:
            print("=> no checkpoint found at '{}'".format(out_subdir +
                                                          best_model_acc_path))

        if os.path.isfile(out_subdir + best_model_auc_path):
            print("=> loading checkpoint '{}'".format(best_model_auc_path))
            checkpoint = torch.load(out_subdir + best_model_auc_path)
            netG.load_state_dict(checkpoint['state_dict_G'])
            netD.load_state_dict(checkpoint['state_dict_D'])
            if model_num == 3 or model_num == 4:
                netT.load_state_dict(checkpoint['state_dict_T'])
            it = checkpoint['it']
            print("iteration: {}".format(it))

            netG.eval()
            test_auc = eval_fakefeat_GZSL(netG, dataset, dataset.test_cls_num,
                                          dataset.test_att,
                                          dataset.test_unseen_feature,
                                          dataset.test_unseen_label, param,
                                          out_subdir, result)

            result.test_auc = test_auc
        else:
            print("=> no checkpoint found at '{}'".format(out_subdir +
                                                          best_model_auc_path))

        if os.path.isfile(out_subdir + best_model_hm_path):
            print("=> loading checkpoint '{}'".format(best_model_hm_path))
            checkpoint = torch.load(out_subdir + best_model_hm_path)
            netG.load_state_dict(checkpoint['state_dict_G'])
            netD.load_state_dict(checkpoint['state_dict_D'])
            if model_num == 3 or model_num == 4:
                netT.load_state_dict(checkpoint['state_dict_T'])
            it = checkpoint['it']
            print("iteration: {}".format(it))

            netG.eval()
            test_hm, test_acc_S_T, test_acc_U_T = eval_fakefeat_test_gzsl(
                netG, dataset, dataset.test_cls_num, dataset.test_att,
                dataset.test_unseen_feature, dataset.test_unseen_label, param,
                result)

            result.test_hm = test_hm
            result.test_acc_S_T = test_acc_S_T
            result.test_acc_U_T = test_acc_U_T
        else:
            print("=> no checkpoint found at '{}'".format(out_subdir +
                                                          best_model_hm_path))

        log_text_2 = 'test_acc: %f, test_auc: %f, test_hm: %f, test_acc_S_T: %f, test_acc_U_T: %f' % (
            result.test_acc, result.test_auc, result.test_hm,
            result.test_acc_S_T, result.test_acc_U_T)
        with open(log_dir_2, 'a') as f:
            f.write(log_text_2 + '\n')

    return result
Example #12
0
    def __init__(self, opt, nclasses, mean, std, source_trainloader,
                 source_valloader, targetloader, class_balance, augment):

        self.source_trainloader = source_trainloader
        self.source_valloader = source_valloader
        self.targetloader = targetloader
        self.opt = opt
        self.mean = mean
        self.std = std
        self.best_val = 0

        # Defining networks and optimizers
        self.nclasses = nclasses
        self.netG = models._netG(opt, nclasses)
        self.netD = models._netD(opt, nclasses)
        self.netF = models._netF(opt)
        self.netC = models._netC(opt, nclasses)

        # Weight initialization
        self.netG.apply(utils.weights_init)
        self.netD.apply(utils.weights_init)
        self.netF.apply(utils.weights_init)
        self.netC.apply(utils.weights_init)

        # Defining loss criterions
        self.criterion_c = nn.CrossEntropyLoss()
        self.criterion_s = nn.BCELoss()

        if opt.gpu >= 0:
            self.netD.cuda()
            self.netG.cuda()
            self.netF.cuda()
            self.netC.cuda()
            self.criterion_c.cuda()
            self.criterion_s.cuda()

        # Defining optimizers
        self.optimizerD = optim.Adam(self.netD.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizerG = optim.Adam(self.netG.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizerF = optim.Adam(self.netF.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizerC = optim.Adam(self.netC.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))

        # Other variables
        self.real_label_val = 1
        self.fake_label_val = 0

        self.augment = augment
        self.class_balance = class_balance
        self.uniform_cls_distribution = torch.ones(self.nclasses) * float(
            1.0 / self.nclasses)
        self.cls_bal_fn = robust_binary_crossentropy
        if self.opt.gpu >= 0:
            self.uniform_cls_distribution = self.uniform_cls_distribution.cuda(
            )
Example #13
0
def train():
    param = _param()
    dataset = LoadDataset(opt)
    param.X_dim = dataset.feature_dim

    data_layer = FeatDataLayer(dataset.labels_train, dataset.pfc_feat_data_train, opt)
    result = Result()

    netG = _netG(dataset.text_dim, dataset.feature_dim).cuda()
    netG.apply(weights_init)
    print(netG)
    netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda()
    netD.apply(weights_init)
    print(netD)

    exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD'
    exp_params = 'Eu{}_Rls{}_RWz{}'.format(opt.CENT_LAMBDA , opt.REG_W_LAMBDA, opt.REG_Wz_LAMBDA)

    out_dir  = 'out/{:s}'.format(exp_info)
    out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params)
    if not os.path.exists('out'):
        os.mkdir('out')
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    if not os.path.exists(out_subdir):
        os.mkdir(out_subdir)

    cprint(" The output dictionary is {}".format(out_subdir), 'red')
    log_dir  = out_subdir + '/log_{:s}.txt'.format(exp_info)
    with open(log_dir, 'a') as f:
        f.write('Training Start:')
        f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n')

    start_step = 0

    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            netG.load_state_dict(checkpoint['state_dict_G'])
            netD.load_state_dict(checkpoint['state_dict_D'])
            start_step = checkpoint['it']
            print(checkpoint['log'])
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    nets = [netG, netD]

    tr_cls_centroid = Variable(torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda()
    optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9))

    for it in range(start_step, 3000+1):
        """ Discriminator """
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']             # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_text_feature[i,:] for i in labels])
            text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

            # GAN's D loss
            D_real, C_real = netD(X)
            D_loss_real = torch.mean(D_real)
            C_loss_real = F.cross_entropy(C_real, y_true)
            DC_loss = -D_loss_real + C_loss_real
            DC_loss.backward()

            # GAN's D loss
            G_sample = netG(z, text_feat).detach()
            D_fake, C_fake = netD(G_sample)
            D_loss_fake = torch.mean(D_fake)
            C_loss_fake = F.cross_entropy(C_fake, y_true)
            DC_loss = D_loss_fake + C_loss_fake
            DC_loss.backward()

            # train with gradient penalty (WGAN_GP)
            grad_penalty = calc_gradient_penalty(netD, X.data, G_sample.data)
            grad_penalty.backward()

            Wasserstein_D = D_loss_real - D_loss_fake
            optimizerD.step()
            reset_grad(nets)

        """ Generator """
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_text_feature[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda()

            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

            G_sample = netG(z, text_feat)
            D_fake, C_fake = netD(G_sample)
            _,      C_real = netD(X)

            # GAN's G loss
            G_loss = torch.mean(D_fake)
            # Auxiliary classification loss
            C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true))/2

            GC_loss = -G_loss + C_loss

            # Centroid loss
            Euclidean_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for i in range(dataset.train_cls_num):
                    sample_idx = (y_true == i).data.nonzero().squeeze()
                    if sample_idx.numel() == 0:
                        Euclidean_loss += 0.0
                    else:
                        G_sample_cls = G_sample[sample_idx, :]
                        Euclidean_loss += (G_sample_cls.mean(dim=0) - tr_cls_centroid[i]).pow(2).sum().sqrt()
                Euclidean_loss *= 1.0/dataset.train_cls_num * opt.CENT_LAMBDA

            # ||W||_2 regularization
            reg_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for name, p in netG.named_parameters():
                    if 'weight' in name:
                        reg_loss += p.pow(2).sum()
                reg_loss.mul_(opt.REG_W_LAMBDA)

            # ||W_z||21 regularization, make W_z sparse
            reg_Wz_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_Wz_LAMBDA != 0:
                Wz = netG.rdc_text.weight
                reg_Wz_loss = Wz.pow(2).sum(dim=0).sqrt().sum().mul(opt.REG_Wz_LAMBDA)

            all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss
            all_loss.backward()
            optimizerG.step()
            reset_grad(nets)

        if it % opt.disp_interval == 0 and it:
            acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0])
            acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0])

            log_text = 'Iter-{}; Was_D: {:.4}; Euc_ls: {:.4}; reg_ls: {:.4}; Wz_ls: {:.4}; G_loss: {:.4}; D_loss_real: {:.4};' \
                       ' D_loss_fake: {:.4}; rl: {:.4}%; fk: {:.4}%'\
                        .format(it, Wasserstein_D.data[0],  Euclidean_loss.data[0], reg_loss.data[0],reg_Wz_loss.data[0],
                                G_loss.data[0], D_loss_real.data[0], D_loss_fake.data[0], acc_real * 100, acc_fake * 100)
            print(log_text)
            with open(log_dir, 'a') as f:
                f.write(log_text+'\n')

        if it % opt.evl_interval == 0 and it >= 100:
            netG.eval()
            eval_fakefeat_test(it, netG, dataset, param, result)
            if result.save_model:
                files2remove = glob.glob(out_subdir + '/Best_model*')
                for _i in files2remove:
                    os.remove(_i)
                torch.save({
                    'it': it + 1,
                    'state_dict_G': netG.state_dict(),
                    'state_dict_D': netD.state_dict(),
                    'random_seed': opt.manualSeed,
                    'log': log_text,
                }, out_subdir + '/Best_model_Acc_{:.2f}.tar'.format(result.acc_list[-1]))
            netG.train()

        if it % opt.save_interval == 0 and it:
            torch.save({
                    'it': it + 1,
                    'state_dict_G': netG.state_dict(),
                    'state_dict_D': netD.state_dict(),
                    'random_seed': opt.manualSeed,
                    'log': log_text,
                },  out_subdir + '/Iter_{:d}.tar'.format(it))
            cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it), 'red')
Example #14
0
    def __init__(self, opt, nclasses, mean, std, source_trainloader,
                 source_valloader, target_trainloader, target_valloader,
                 res_dir):

        self.source_trainloader = source_trainloader
        self.source_valloader = source_valloader
        self.target_trainloader = target_trainloader
        self.target_valloader = target_valloader
        self.opt = opt
        self.best_val = 0

        # Defining networks and optimizers
        self.nclasses = nclasses
        self.netG = models._netG(opt, nclasses, flattens=opt.flattens)
        self.netD = models._netD(opt, nclasses)
        self.netF = models._netF(opt)
        self.netC = models._netC(opt, nclasses, flattens=opt.flattens)

        # Weight initialization
        self.netG.apply(utils.weights_init)
        self.netD.apply(utils.weights_init)
        self.netF.apply(utils.weights_init)
        self.netC.apply(utils.weights_init)

        logging.basicConfig(filename='{}/app.log'.format(res_dir),
                            level=logging.DEBUG,
                            format='%(asctime)s:%(levelname)s:%(message)s')

        if True:
            print('netG<<')
            print(self.netG)
            logging.debug(self.netG)
            print('>>\n')
            print('netD<<')
            print(self.netD)
            logging.debug(self.netD)
            print('>>\n')
            print('netF<<')
            print(self.netF)
            logging.debug(self.netF)
            print('>>\n')
            print('netC<<')
            print(self.netC)
            logging.debug(self.netC)
            print('>>')

        # Defining loss criterions
        self.criterion_c = nn.CrossEntropyLoss()
        self.criterion_s = nn.BCELoss()

        self.mmd_loss = MMD_loss()
        self.mse_loss = nn.MSELoss()

        if opt.gpu >= 0:
            self.netD.cuda()
            self.netG.cuda()
            self.netF.cuda()
            self.netC.cuda()
            self.criterion_c.cuda()
            self.criterion_s.cuda()

        # Defining optimizers
        self.optimizerD = optim.Adam(self.netD.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizerG = optim.Adam(self.netG.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizerF = optim.Adam(self.netF.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))
        self.optimizerC = optim.Adam(self.netC.parameters(),
                                     lr=opt.lr,
                                     betas=(opt.beta1, 0.999))

        # Other variables
        self.real_label_val = 1
        self.fake_label_val = 0
Example #15
0
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


netG = _netG(ngpu, nz, ngf, nc)
netG.apply(weights_init)
if opt.netG != '':
    netG.load_state_dict(torch.load(opt.netG))
print(netG)

netD = _netD(ngpu, nc, ndf)
netD.apply(weights_init)
if opt.netD != '':
    netD.load_state_dict(torch.load(opt.netD))
print(netD)

criterion = nn.BCELoss()

input = torch.FloatTensor(opt.batchSize, 3, opt.imageSize, opt.imageSize)
noise = torch.FloatTensor(opt.batchSize, nz, 1, 1)
fixed_noise = torch.FloatTensor(opt.batchSize, nz, 1, 1).normal_(0, 1)
label = torch.FloatTensor(opt.batchSize)
real_label = 1
fake_label = 0

if opt.cuda:
Example #16
0
def train():
    param = _param()
    print("load dataset origin")
    dataset_origin = LoadDataset_origin(opt)
    print("load dataset")
    dataset = LoadDataset(opt)
    param.X_dim = dataset.feature_dim

    data_layer_origin = FeatDataLayer_origin(
        dataset_origin.labels_train, dataset_origin.pfc_feat_data_train, opt)
    data_layer = FeatDataLayer_add_FG(
        dataset.labels_train, dataset.pfc_feat_data_train, opt,
        dataset.train_text_feature, dataset.familyToText, dataset.genusToText,
        dataset.familyLabelToBirdLabel, dataset.genusLabelToBirdLabel,
        dataset.labels_origin_train)
    result = Result()
    result_gzsl = Result()
    netG = _netG(dataset.text_dim, dataset.feature_dim).cuda()
    netG.apply(weights_init)
    print(netG)
    netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda()
    netD.apply(weights_init)
    print(netD)

    exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD'
    exp_params = 'Eu{}_Rls{}_RWz{}'.format(opt.CENT_LAMBDA, opt.REG_W_LAMBDA,
                                           opt.REG_Wz_LAMBDA)

    out_dir = 'out/{:s}'.format(exp_info)
    out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params)
    opt.out_subdir = out_subdir

    if not os.path.exists('out'):
        os.mkdir('out')
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    if not os.path.exists(out_subdir):
        os.mkdir(out_subdir)

    cprint(" The output dictionary is {}".format(out_subdir), 'red')
    log_dir = out_subdir + '/log_{:s}'.format(exp_info)

    if opt.exp_no != "":
        log_dir += "_" + opt.exp_no
    log_dir += ".txt"

    opt.log_dir = log_dir
    opt.auc_plot_dir = out_subdir + '/best_auc_plot{:s}_{:s}'.format(
        opt.exp_no, exp_info)

    opt.auc_solid_plot_dir = out_subdir + '/solid_auc_plot{:s}_{:s}'.format(
        opt.exp_no, exp_info)

    opt.history_D_loss_dir = out_subdir + '/D_loss_plot{:s}_{:s}'.format(
        opt.exp_no, exp_info)
    opt.history_G_loss_dir = out_subdir + '/G_loss_plot{:s}_{:s}'.format(
        opt.exp_no, exp_info)

    start_step = 0

    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            netG.load_state_dict(checkpoint['state_dict_G'])
            netD.load_state_dict(checkpoint['state_dict_D'])
            start_step = checkpoint['it']
            print(checkpoint['log'])
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    nets = [netG, netD]

    # tr_cls_centroid = Variable(torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda()
    optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    history_D_loss = []
    history_G_loss = []
    for it in range(start_step, 10000 + 1):
        cur_D_loss = 0
        cur_G_loss = 0
        """ Discriminator """
        for _ in range(5):
            blobs = data_layer_origin.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels

            text_feat = np.array(
                [dataset_origin.train_text_feature[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()
            y_true = y_true.to(device=device, dtype=torch.long)
            # GAN's D loss
            D_real, C_real = netD(X)
            D_loss_real = torch.mean(D_real)
            C_loss_real = F.cross_entropy(C_real, y_true)
            DC_loss = -D_loss_real + C_loss_real
            DC_loss.backward()
            cur_D_loss += DC_loss.item()

            # GAN's D loss
            G_sample = netG(z, text_feat).detach()
            D_fake, C_fake = netD(G_sample)
            D_loss_fake = torch.mean(D_fake)
            C_loss_fake = F.cross_entropy(C_fake, y_true)
            DC_loss = D_loss_fake + C_loss_fake
            DC_loss.backward()
            cur_D_loss += DC_loss.item()
            # train with gradient penalty (WGAN_GP)
            grad_penalty = calc_gradient_penalty(netD, X.data, G_sample.data)
            grad_penalty.backward()

            Wasserstein_D = D_loss_real - D_loss_fake
            optimizerD.step()
            reset_grad(nets)
            cur_D_loss += Wasserstein_D.item()
        """ Generator """
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)
            origin_labels = blobs['minibatch_origin_label'].astype(int)
            text_feat = blobs['text_feat']  # text_feat
            # text_feat = np.array([dataset.train_text_feature[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()

            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            y_origin_true = Variable(
                torch.from_numpy(origin_labels.astype('int'))).cuda()
            y_true = y_true.to(device=device, dtype=torch.long)
            z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

            G_sample = netG(z, text_feat)
            D_fake, C_fake = netD(G_sample)
            _, C_real = netD(X)

            # GAN's G loss
            G_loss = torch.mean(D_fake)
            # Auxiliary classification loss
            C_loss = (F.cross_entropy(C_real, y_true) +
                      F.cross_entropy(C_fake, y_true)) / 2

            GC_loss = -G_loss + C_loss

            cur_G_loss += -G_loss.item() + F.cross_entropy(C_fake,
                                                           y_true).item()
            Bird_Euclidean_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.CENT_LAMBDA != 0 and opt.BIRD_CENT_LAMBDA != 0:
                for i in range(dataset.train_cls_num):
                    sample_idx = (y_origin_true == i).data.nonzero().squeeze()
                    if sample_idx.numel() == 0:
                        Bird_Euclidean_loss += 0.0
                    else:
                        G_sample_cls = G_sample[sample_idx, :]
                        center = Variable(
                            torch.from_numpy(dataset.tr_cls_centroid[i].astype(
                                'float32'))).cuda()
                        Bird_Euclidean_loss += (G_sample_cls.mean(dim=0) -
                                                center).pow(2).sum().sqrt()
                Bird_Euclidean_loss *= 1.0 / dataset.train_cls_num

            Family_Euclidean_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.CENT_LAMBDA != 0 and opt.FAMILY_CENT_LAMBDA != 0:
                for i in range(dataset.familyLabelStart,
                               dataset.familyLabelEnd):
                    sample_idx = (y_origin_true == i).data.nonzero().squeeze()
                    if sample_idx.numel() == 0:
                        Family_Euclidean_loss += 0.0
                    else:
                        G_sample_cls = G_sample[sample_idx, :]
                        center = Variable(
                            torch.from_numpy(dataset.tr_cls_centroid[i].astype(
                                'float32'))).cuda()
                        Family_Euclidean_loss += (G_sample_cls.mean(dim=0) -
                                                  center).pow(2).sum().sqrt()
                Family_Euclidean_loss *= 1.0 / (dataset.familyLabelEnd -
                                                dataset.familyLabelStart)

            Genus_Euclidean_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.CENT_LAMBDA != 0 and opt.GENUS_CENT_LAMBDA != 0:
                for i in range(dataset.genusLabelStart, dataset.genusLabelEnd):
                    sample_idx = (y_origin_true == i).data.nonzero().squeeze()
                    if sample_idx.numel() == 0:
                        Genus_Euclidean_loss += 0.0
                    else:
                        G_sample_cls = G_sample[sample_idx, :]
                        center = Variable(
                            torch.from_numpy(dataset.tr_cls_centroid[i].astype(
                                'float32'))).cuda()
                        Genus_Euclidean_loss += (G_sample_cls.mean(dim=0) -
                                                 center).pow(2).sum().sqrt()
                Genus_Euclidean_loss *= 1.0 / (dataset.genusLabelEnd -
                                               dataset.genusLabelStart)

            Euclidean_loss = opt.CENT_LAMBDA * (
                opt.BIRD_CENT_LAMBDA * Bird_Euclidean_loss +
                opt.FAMILY_CENT_LAMBDA * Family_Euclidean_loss +
                opt.GENUS_CENT_LAMBDA * Genus_Euclidean_loss)

            # ||W||_2 regularization
            reg_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for name, p in netG.named_parameters():
                    if 'weight' in name:
                        reg_loss += p.pow(2).sum()
                reg_loss.mul_(opt.REG_W_LAMBDA)

            # ||W_z||21 regularization, make W_z sparse
            reg_Wz_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_Wz_LAMBDA != 0:
                Wz = netG.rdc_text.weight
                reg_Wz_loss = Wz.pow(2).sum(dim=0).sqrt().sum().mul(
                    opt.REG_Wz_LAMBDA)

            all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss
            all_loss.backward()
            optimizerG.step()
            reset_grad(nets)

            cur_G_loss += Euclidean_loss.item()
        history_D_loss.append(cur_D_loss)
        history_G_loss.append(cur_G_loss)
        print("Iter-" + str(it + 1) + "; G-loss: " + str(cur_G_loss) +
              "; D-loss: " + str(cur_D_loss))

        if it % opt.disp_interval == 0 and it:
            acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])
            acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])

            log_text = 'Iter-{}; Was_D: {:.4}; Euc_ls: {:.4}; Bird_Euc_ls: {:.4}; family_Euc_ls: {:.4}; ' \
                       'Genus_Euc_ls: {:.4}; reg_ls: {:.4}; Wz_ls: {:.4}; G_loss: {:.4}; D_loss_real: {:.4};' \
                       ' D_loss_fake: {:.4}; rl: {:.4}%; fk: {:.4}%' \
                .format(it,
                        Wasserstein_D.item(),
                        Euclidean_loss.item(),
                        Bird_Euclidean_loss.item(),
                        Family_Euclidean_loss.item(),
                        Genus_Euclidean_loss.item(),
                        reg_loss.item(),
                        reg_Wz_loss.item(),
                        G_loss.item(),
                        D_loss_real.item(),
                        D_loss_fake.item(),
                        acc_real * 100, acc_fake * 100)
            print(log_text)

        if it % opt.evl_interval == 0 and it >= 100:
            netG.eval()
            eval_fakefeat_test(it, netG, dataset_origin, param, result)
            eval_fakefeat_GZSL(it, netG, dataset_origin, param, result_gzsl)
            if result.save_model:
                files2remove = glob.glob(
                    out_subdir + '/Best_model{}_Acc*'.format(opt.exp_no))
                for _i in files2remove:
                    os.remove(_i)
                torch.save(
                    {
                        'it': it + 1,
                        'state_dict_G': netG.state_dict(),
                        'state_dict_D': netD.state_dict(),
                        'random_seed': opt.manualSeed,
                        'log': log_text,
                    }, out_subdir + '/Best_model{}_Acc_{:.2f}.tar'.format(
                        opt.exp_no, result.acc_list[-1]))

            if result_gzsl.save_model:
                files2remove = glob.glob(
                    out_subdir + '/Best_model{}_Auc*'.format(opt.exp_no))
                for _i in files2remove:
                    os.remove(_i)
                torch.save(
                    {
                        'it': it + 1,
                        'state_dict_G': netG.state_dict(),
                        'state_dict_D': netD.state_dict(),
                        'random_seed': opt.manualSeed,
                        'log': log_text,
                    }, out_subdir + '/Best_model{}_Auc_{:.2f}.tar'.format(
                        opt.exp_no, result_gzsl.best_auc * 100))

            netG.train()

        if it % opt.save_interval == 0 and it:
            torch.save(
                {
                    'it': it + 1,
                    'state_dict_G': netG.state_dict(),
                    'state_dict_D': netD.state_dict(),
                    'random_seed': opt.manualSeed,
                    'log': log_text,
                }, out_subdir + '/Iter_{:d}.tar'.format(it))
            cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it),
                   'red')

    print("Reproduce CUB {}".format(opt.splitmode))
    print("Accuracy is {:.4}%, and Generalized AUC is {:.4}%".format(
        result.best_acc, result_gzsl.best_auc * 100))

    np.savetxt(opt.history_D_loss_dir + '.txt', history_D_loss, fmt='%.015f')
    np.savetxt(opt.history_G_loss_dir + '.txt', history_G_loss, fmt='%.015f')
Example #17
0
                                     batch_size=batch_size,
                                     shuffle=True,
                                     num_workers=4)
    print('data is ready!')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    vi_fea_dim = zsl_dataset.vis_fea_dim
    se_fea_dim = zsl_dataset.sem_fea_dim
    n_tr_class = zsl_dataset.n_tr_class
    z_dim = args.z_dim
    if args.use_z.lower() == 'true':
        netG = _netG(se_fea_dim, vi_fea_dim, z_dim).to(device)
    else:
        netG = _netG2(se_fea_dim, vi_fea_dim).to(device)
    netD = _netD(vi_fea_dim, n_tr_class).to(device)
    netR = Regressor(vi_fea_dim, se_fea_dim).to(device)
    nets = [netG, netD, netR]
    nets_weights_init(nets)
    print_nets(nets)

    te_data_unseen, te_data_seen = zsl_dataset.get_testData()
    te_vis_fea_unseen, te_sem_fea_unseen, te_label_unseen, te_labelID_unseen, te_sem_fea_pro_unseen = te_data_unseen
    te_vis_fea_seen, te_sem_fea_seen, te_label_seen, te_labelID_seen, te_sem_fea_pro_seen = te_data_seen
    tr_vis_fea, tr_sem_fea, all_tr_label, tr_labelID, tr_sem_fea_pro = zsl_dataset.get_trainData(
    )

    tr_cls_centroid = zsl_dataset.get_tr_centroid()
    tr_cls_centroid = torch.from_numpy(tr_cls_centroid).to(device)

    which_optimizer = args.optimizer.lower()
    def __init__(self, args):

        self.args = args
        Path(args.saver_root).mkdir(parents=True, exist_ok=True)

        if args.exp == 'MNIST':
            self.log('Running MNIST -> MNIST-M')
            dataloders = datasets.form_mnist_dataset(args)
        elif args.exp == 'VISDA':
            # TODO: Include VISDA
            pass

        self.s_trainloader = dataloders['s_train']
        self.s_valloader = dataloders['s_val']
        self.t_trainloader = dataloders['t_train']
        self.t_valloader = dataloders['t_val']
        self.s_trainloader_classwise = dataloders['s_classwise']
        nclasses = self.nclasses = dataloders['nclasses']

        self.s_classwise_iterators = []
        for i in range(len(self.s_trainloader_classwise)):
            self.s_classwise_iterators.append(
                iter(self.s_trainloader_classwise[i]))

        ###############################
        # Create models
        self.netF = models._netF().cuda()
        self.netC = models._netC(self.nclasses).cuda()
        if args.alg == 'wasserstein' or args.alg == 'NW':
            self.netD = models._netD_wasserstein().cuda()
        else:
            self.netD = models._netD().cuda()

        # Create optimizers
        if args.adam:
            self.optimizerF = optim.Adam(self.netF.parameters(),
                                         lr=args.lr,
                                         betas=(0.5, 0.999))
            self.optimizerC = optim.Adam(self.netC.parameters(),
                                         lr=args.lr,
                                         betas=(0.5, 0.999))
            self.optimizerD = optim.Adam(self.netD.parameters(),
                                         lr=args.lr,
                                         betas=(0.5, 0.999))
            if args.alg == 'NW':
                self.pi = nn.Parameter(
                    torch.FloatTensor(nclasses).fill_(1.0 / nclasses).cuda())
                self.optimizerPi = optim.Adam(iter([self.pi]),
                                              lr=args.lrPi,
                                              betas=(0.5, 0.999))
        else:
            self.optimizerF = optim.SGD(self.netF.parameters(),
                                        lr=args.lr,
                                        momentum=0.9)
            self.optimizerC = optim.SGD(self.netC.parameters(),
                                        lr=args.lr,
                                        momentum=0.9)
            self.optimizerD = optim.SGD(self.netD.parameters(),
                                        lr=args.lr,
                                        momentum=0.9)
            if args.alg == 'NW':
                self.pi = nn.Parameter(
                    torch.FloatTensor(nclasses).fill_(1.0 / nclasses).cuda())
                self.optimizerPi = optim.SGD(iter([self.pi]), lr=args.lrPi)
Example #19
0
    def __init__(self, opt, nclasses, mean, std, source_trainloader, source_valloader, target_trainloader, target_valloader):

        self.source_trainloader = source_trainloader
        self.source_valloader = source_valloader
        self.target_trainloader = target_trainloader
        self.target_valloader = target_valloader
        self.opt = opt
        self.mean = mean
        self.std = std
        self.best_val = 0
        
        # Defining networks and optimizers
        self.nclasses = nclasses
        self.netG = models._netG(opt, nclasses)
        self.netD = models._netD(opt, nclasses)
        self.netF = models._netF(opt)
        self.netC = models._netC(opt, nclasses)

        if torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
            self.netG = nn.DataParallel(self.netG)
            self.netD = nn.DataParallel(self.netD)
            self.netF = nn.DataParallel(self.netF)
            self.netC = nn.DataParallel(self.netC)

        # Weight initialization
        self.netG.apply(utils.weights_init)
        self.netD.apply(utils.weights_init)
        self.netF.apply(utils.weights_init)
        self.netC.apply(utils.weights_init)

        if opt.loadExisting != 0: 

            netF_path = os.path.join(opt.checkpoint_dir, 'model_best_netF_sourceonly.pth')
            netC_path = os.path.join(opt.checkpoint_dir, 'model_best_netC_sourceonly.pth')

            netG_path = os.path.join(opt.checkpoint_dir, 'model_best_netG.pth')
            netD_path = os.path.join(opt.checkpoint_dir, 'model_best_netD.pth')
            if os.path.isfile(netF_path):
                self.netF.load_state_dict(torch.load(netF_path))
            if os.path.isfile(netC_path):
                self.netC.load_state_dict(torch.load(netC_path))
            if os.path.isfile(netG_path):
                self.netG.load_state_dict(torch.load(netG_path))
            if os.path.isfile(netD_path):
                self.netD.load_state_dict(torch.load(netD_path))        


        # Defining loss criterions
        self.criterion_c = nn.CrossEntropyLoss()
        self.criterion_s = nn.BCELoss()

        if opt.gpu>=0:
            self.netD.cuda()
            self.netG.cuda()
            self.netF.cuda()
            self.netC.cuda()
            self.criterion_c.cuda()
            self.criterion_s.cuda()

        # Defining optimizers
        self.optimizerD = optim.Adam(self.netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerG = optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerF = optim.Adam(self.netF.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
        self.optimizerC = optim.Adam(self.netC.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

        # Other variables
        self.real_label_val = 1
        self.fake_label_val = 0
Example #20
0
def train(creative_weight=1000, model_num=1, is_val=True):
    param = _param()
    if opt.dataset == 'CUB':
        dataset = LoadDataset(opt, main_dir, is_val)
        exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD'
    elif opt.dataset == 'NAB':
        dataset = LoadDataset_NAB(opt, main_dir, is_val)
        exp_info = 'NAB_EASY' if opt.splitmode == 'easy' else 'NAB_HARD'
    else:
        print('No Dataset with that name')
        sys.exit(0)
    param.X_dim = dataset.feature_dim
    opt.Creative_weight = creative_weight

    data_layer = FeatDataLayer(dataset.labels_train,
                               dataset.pfc_feat_data_train, opt)
    result = Result()

    ones = Variable(torch.Tensor(1, 1))
    ones.data.fill_(1.0)

    netG = _netG(dataset.text_dim, dataset.feature_dim).cuda()
    netG.apply(weights_init)
    if model_num == 6:
        netD = _netD(dataset.train_cls_num + 1, dataset.feature_dim).cuda()
    else:
        netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda()
    netD.apply(weights_init)

    if model_num == 2:
        log_SM_ab = Scale(2)
        log_SM_ab = nn.DataParallel(log_SM_ab).cuda()
    elif model_num == 4 or model_num == 5:
        log_SM_ab = Scale(1)
        log_SM_ab = nn.DataParallel(log_SM_ab).cuda()

    exp_params = 'Model_{}_CAN{}_Eu{}_Rls{}_RWz{}_{}'.format(
        model_num, opt.Creative_weight, opt.CENT_LAMBDA, opt.REG_W_LAMBDA,
        opt.REG_Wz_LAMBDA, opt.exp_name)

    out_subdir = main_dir + 'out/{:s}/{:s}'.format(exp_info, exp_params)
    if not os.path.exists(out_subdir):
        os.makedirs(out_subdir)

    log_dir = out_subdir + '/log_{:s}.txt'.format(exp_info)
    with open(log_dir, 'a') as f:
        f.write('Training Start:')
        f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n')

    start_step = 0

    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            netG.load_state_dict(checkpoint['state_dict_G'])
            netD.load_state_dict(checkpoint['state_dict_D'])
            start_step = checkpoint['it']
            print(checkpoint['log'])
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    if model_num == 2 or model_num == 4 or model_num == 5:
        nets = [netG, netD, log_SM_ab]
    else:
        nets = [netG, netD]

    tr_cls_centroid = Variable(
        torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda()
    optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    if model_num == 2 or model_num == 4 or model_num == 5:
        optimizer_SM_ab = optim.Adam(log_SM_ab.parameters(),
                                     lr=opt.lr,
                                     betas=(0.5, 0.999))

    for it in tqdm(range(start_step, 3000 + 1)):
        # Creative Loss
        blobs = data_layer.forward()
        labels = blobs['labels'].astype(int)
        new_class_labels = Variable(
            torch.from_numpy(np.ones_like(labels) *
                             dataset.train_cls_num)).cuda()
        text_feat_1 = np.array(
            [dataset.train_text_feature[i, :] for i in labels])
        text_feat_2 = np.array(
            [dataset.train_text_feature[i, :] for i in labels])
        np.random.shuffle(
            text_feat_1
        )  # Shuffle both features to guarantee different permutations
        np.random.shuffle(text_feat_2)
        alpha = (np.random.random(len(labels)) * (.8 - .2)) + .2

        text_feat_mean = np.multiply(alpha, text_feat_1.transpose())
        text_feat_mean += np.multiply(1. - alpha, text_feat_2.transpose())
        text_feat_mean = text_feat_mean.transpose()
        text_feat_mean = normalize(text_feat_mean, norm='l2', axis=1)
        text_feat_Creative = Variable(
            torch.from_numpy(text_feat_mean.astype('float32'))).cuda()
        z_creative = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()
        G_creative_sample = netG(z_creative, text_feat_Creative)
        """ Discriminator """
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels

            text_feat = np.array(
                [dataset.train_text_feature[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

            # GAN's D loss
            D_real, C_real = netD(X)
            D_loss_real = torch.mean(D_real)
            C_loss_real = F.cross_entropy(C_real, y_true)
            DC_loss = -D_loss_real + C_loss_real
            DC_loss.backward()

            # GAN's D loss
            G_sample = netG(z, text_feat).detach()
            D_fake, C_fake = netD(G_sample)
            D_loss_fake = torch.mean(D_fake)
            C_loss_fake = F.cross_entropy(C_fake, y_true)

            DC_loss = D_loss_fake + C_loss_fake
            DC_loss.backward()

            # train with gradient penalty (WGAN_GP)
            grad_penalty = calc_gradient_penalty(netD, X.data, G_sample.data)
            grad_penalty.backward()

            Wasserstein_D = D_loss_real - D_loss_fake
            optimizerD.step()
            reset_grad(nets)
        """ Generator """
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array(
                [dataset.train_text_feature[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()

            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

            G_sample = netG(z, text_feat)
            D_fake, C_fake = netD(G_sample)
            _, C_real = netD(X)

            # GAN's G loss
            G_loss = torch.mean(D_fake)
            # Auxiliary classification loss
            C_loss = (F.cross_entropy(C_real, y_true) +
                      F.cross_entropy(C_fake, y_true)) / 2

            GC_loss = -G_loss + C_loss

            # Centroid loss
            Euclidean_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for i in range(dataset.train_cls_num):
                    sample_idx = (y_true == i).data.nonzero().squeeze()
                    if sample_idx.numel() == 0:
                        Euclidean_loss += 0.0
                    else:
                        G_sample_cls = G_sample[sample_idx, :]
                        Euclidean_loss += (
                            G_sample_cls.mean(dim=0) -
                            tr_cls_centroid[i]).pow(2).sum().sqrt()
                Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA

            # ||W||_2 regularization
            reg_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for name, p in netG.named_parameters():
                    if 'weight' in name:
                        reg_loss += p.pow(2).sum()
                reg_loss.mul_(opt.REG_W_LAMBDA)

            # ||W_z||21 regularization, make W_z sparse
            reg_Wz_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_Wz_LAMBDA != 0:
                Wz = netG.rdc_text.weight
                reg_Wz_loss = Wz.pow(2).sum(dim=0).sqrt().sum().mul(
                    opt.REG_Wz_LAMBDA)

            # D(C| GX_fake)) + Classify GX_fake as real
            D_creative_fake, C_creative_fake = netD(G_creative_sample)
            if model_num == 1:  # KL Divergence
                G_fake_C = F.log_softmax(C_creative_fake)
            else:
                G_fake_C = F.softmax(C_creative_fake)

            if model_num == 1:  # KL Divergence
                entropy_GX_fake = (G_fake_C / G_fake_C.data.size(1)).mean()
            elif model_num == 2:  # SM Divergence
                q_shape = Variable(
                    torch.FloatTensor(G_fake_C.data.size(0),
                                      G_fake_C.data.size(1))).cuda()
                q_shape.data.fill_(1.0 / G_fake_C.data.size(1))

                SM_ab = F.sigmoid(log_SM_ab(ones))
                SM_a = 0.2 + torch.div(SM_ab[0][0], 1.6666666666666667).cuda()
                SM_b = 0.2 + torch.div(SM_ab[0][1], 1.6666666666666667).cuda()
                pow_a_b = torch.div(1 - SM_a, 1 - SM_b)
                alpha_term = (torch.pow(G_fake_C + 1e-5, SM_a) *
                              torch.pow(q_shape, 1 - SM_a)).sum(1)
                entropy_GX_fake_vec = torch.div(
                    torch.pow(alpha_term, pow_a_b) - 1, SM_b - 1)
            elif model_num == 3:  # Bachatera Divergence
                q_shape = Variable(
                    torch.FloatTensor(G_fake_C.data.size(0),
                                      G_fake_C.data.size(1))).cuda()
                q_shape.data.fill_(1.0 / G_fake_C.data.size(1))
                SM_a = Variable(torch.FloatTensor(1, 1)).cuda()
                SM_a.data.fill_(opt.SM_Alpha)
                SM_b = Variable(torch.FloatTensor(1, 1)).cuda()
                SM_b.data.fill_(opt.SM_Alpha)
                pow_a_b = torch.div(1 - SM_a, 1 - SM_b)
                alpha_term = (torch.pow(G_fake_C + 1e-5, SM_a) *
                              torch.pow(q_shape, 1 - SM_a)).sum(1)
                entropy_GX_fake_vec = -torch.div(
                    torch.pow(alpha_term, pow_a_b) - 1, SM_b - 1)
            elif model_num == 4:  # Tsallis Divergence
                q_shape = Variable(
                    torch.FloatTensor(G_fake_C.data.size(0),
                                      G_fake_C.data.size(1))).cuda()
                q_shape.data.fill_(1.0 / G_fake_C.data.size(1))

                SM_ab = F.sigmoid(log_SM_ab(ones))
                SM_a = 0.2 + torch.div(SM_ab[0][0], 1.6666666666666667).cuda()
                SM_b = SM_a
                pow_a_b = torch.div(1 - SM_a, 1 - SM_b)
                alpha_term = (torch.pow(G_fake_C + 1e-5, SM_a) *
                              torch.pow(q_shape, 1 - SM_a)).sum(1)
                entropy_GX_fake_vec = -torch.div(
                    torch.pow(alpha_term, pow_a_b) - 1, SM_b - 1)
            elif model_num == 5:  # Renyi Divergence
                q_shape = Variable(
                    torch.FloatTensor(G_fake_C.data.size(0),
                                      G_fake_C.data.size(1))).cuda()
                q_shape.data.fill_(1.0 / G_fake_C.data.size(1))

                SM_ab = F.sigmoid(log_SM_ab(ones))
                SM_a = 0.2 + torch.div(SM_ab[0][0], 1.6666666666666667).cuda()
                SM_b = Variable(torch.FloatTensor(1, 1)).cuda()
                SM_b.data.fill_(opt.SM_Beta)
                pow_a_b = torch.div(1 - SM_a, 1 - SM_b)
                alpha_term = (torch.pow(G_fake_C + 1e-5, SM_a) *
                              torch.pow(q_shape, 1 - SM_a)).sum(1)
                entropy_GX_fake_vec = -torch.div(
                    torch.pow(alpha_term, pow_a_b) - 1, SM_b - 1)

            if model_num == 6:
                loss_creative = F.cross_entropy(C_creative_fake,
                                                new_class_labels)
            else:
                if model_num != 1:
                    # Normalize SM-Divergence & Report mean
                    min_e, max_e = torch.min(entropy_GX_fake_vec), torch.max(
                        entropy_GX_fake_vec)
                    entropy_GX_fake_vec = (entropy_GX_fake_vec -
                                           min_e) / (max_e - min_e)
                    entropy_GX_fake = -entropy_GX_fake_vec.mean()
                loss_creative = -opt.Creative_weight * entropy_GX_fake

            disc_GX_fake_real = -torch.mean(D_creative_fake)
            total_loss_creative = loss_creative + disc_GX_fake_real

            all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss + total_loss_creative
            all_loss.backward()
            if model_num == 2 or model_num == 4 or model_num == 5:
                optimizer_SM_ab.step()
            optimizerG.step()
            reset_grad(nets)

        if it % opt.disp_interval == 0 and it:
            acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])
            acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])

            log_text = 'Iter-{}; rl: {:.4}%; fk: {:.4}%'.format(
                it, acc_real * 100, acc_fake * 100)
            with open(log_dir, 'a') as f:
                f.write(log_text + '\n')

        if it % opt.evl_interval == 0 and it > opt.disp_interval:
            netG.eval()
            cur_acc = eval_fakefeat_test(it, netG, dataset, param, result)
            cur_auc = eval_fakefeat_GZSL(netG, dataset, param, out_subdir,
                                         result)

            if cur_acc > result.best_acc:
                result.best_acc = cur_acc

            if cur_auc > result.best_auc:
                result.best_auc = cur_auc

                if it % opt.save_interval:
                    files2remove = glob.glob(out_subdir + '/Best_model*')
                    for _i in files2remove:
                        os.remove(_i)
                    torch.save(
                        {
                            'it': it + 1,
                            'state_dict_G': netG.state_dict(),
                            'state_dict_D': netD.state_dict(),
                            'random_seed': opt.manualSeed,
                            'log': log_text,
                        }, out_subdir +
                        '/Best_model_AUC_{:.2f}.tar'.format(cur_auc))

            netG.train()
    return result
Example #21
0
def train():
    param = _param()
    dataset = DATA_LOADER(opt)
    param.X_dim = dataset.feature_dim

    data_layer = FeatDataLayer(dataset.train_label.numpy(),
                               dataset.train_feature.numpy(), opt)
    result = Result()
    result_gzsl = Result()

    netG = _netG_att(opt, dataset.text_dim, dataset.feature_dim).cuda()
    netG.apply(weights_init)
    print(netG)
    netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda()
    netD.apply(weights_init)
    print(netD)

    netG2 = _netG2_att(opt, dataset.text_dim, dataset.feature_dim).cuda()
    netG2.apply(weights_init)
    print(netG2)
    netD2 = _netD2_att(dataset.text_dim, dataset.train_cls_num).cuda()
    netD2.apply(weights_init)
    print(netD2)

    exp_info = 'GBU_{}'.format(opt.dataset)
    exp_params = 'Eu{}_Rls{}'.format(opt.CENT_LAMBDA, opt.REG_W_LAMBDA)

    out_dir = 'out/{:s}'.format(exp_info)
    out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params)
    if not os.path.exists('out'):
        os.mkdir('out')
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    if not os.path.exists(out_subdir):
        os.mkdir(out_subdir)

    cprint(" The output dictionary is {}".format(out_subdir), 'red')
    log_dir = out_subdir + '/log_{:s}_{}.txt'.format(exp_info, opt.exp_idx)
    with open(log_dir, 'w') as f:
        f.write('Training Start:')
        f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n')

    start_step = 0

    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            netG.load_state_dict(checkpoint['state_dict_G'])
            netD.load_state_dict(checkpoint['state_dict_D'])
            netG2.load_state_dict(checkpoint['state_dict_G2'])
            netD2.load_state_dict(checkpoint['state_dict_D2'])

            start_step = checkpoint['it']
            print(checkpoint['log'])
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))
    nets = [netG, netD, netD2, netD2]

    tr_cls_centroid = Variable(
        torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda()
    optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerD2 = optim.Adam(netD2.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerG2 = optim.Adam(netG2.parameters(), lr=opt.lr, betas=(0.5, 0.9))

    for it in range(start_step, 10000 + 1):
        """ Discriminator """
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_att[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()

            # GAN's D loss
            D_real, C_real = netD(X)
            D_loss_real = torch.mean(D_real)
            C_loss_real = F.cross_entropy(C_real, y_true)
            DC_loss = opt.Adv_LAMBDA * (-D_loss_real + C_loss_real)
            DC_loss.backward()

            # GAN's D loss
            G_sample = netG(z, text_feat).detach()
            D_fake, C_fake = netD(G_sample)
            D_loss_fake = torch.mean(D_fake)

            C_loss_fake = F.cross_entropy(C_fake, y_true)
            DC_loss = opt.Adv_LAMBDA * (D_loss_fake + C_loss_fake)
            DC_loss.backward()

            # train with gradient penalty (WGAN_GP)
            grad_penalty = opt.Adv_LAMBDA * calc_gradient_penalty(
                netD, X.data, G_sample.data)
            grad_penalty.backward()

            Wasserstein_D = D_loss_real - D_loss_fake
            optimizerD.step()
            reset_grad(nets)
        """ Generator """
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_att[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()

            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()

            G_sample = netG(z, text_feat)
            D_fake, C_fake = netD(G_sample)
            _, C_real = netD(X)

            # GAN's G loss
            G_loss = torch.mean(D_fake)
            # Auxiliary classification loss
            C_loss = (F.cross_entropy(C_real, y_true) +
                      F.cross_entropy(C_fake, y_true)) / 2
            GC_loss = opt.Adv_LAMBDA * (-G_loss + C_loss)

            # Centroid loss
            Euclidean_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for i in range(dataset.train_cls_num):
                    sample_idx = (y_true == i).data.nonzero().squeeze()
                    if sample_idx.numel() == 0:
                        Euclidean_loss += 0.0
                    else:
                        G_sample_cls = G_sample[sample_idx, :]
                        Euclidean_loss += (
                            G_sample_cls.mean(dim=0) -
                            tr_cls_centroid[i]).pow(2).sum().sqrt()
                Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA

            # ||W||_2 regularization
            reg_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for name, p in netG.named_parameters():
                    if 'weight' in name:
                        reg_loss += p.pow(2).sum()
                reg_loss.mul_(opt.REG_W_LAMBDA)

            all_loss = GC_loss + Euclidean_loss + reg_loss
            all_loss.backward()
            optimizerG.step()
            reset_grad(nets)
        """D2"""
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_att[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()
            z2 = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

            # G1 results
            visual_sample = netG(z, text_feat)

            # real loss
            D2_real = netD2(text_feat)
            D2_loss_real = torch.mean(D2_real)
            #C2_loss_real = F.cross_entropy(C2_real, y_true)
            DC2_loss = -D2_loss_real  #+  C2_loss_real
            DC2_loss.backward()

            # fake loss
            text_sample = netG2(z2, visual_sample).detach()
            D2_fake = netD2(text_sample)
            D2_loss_fake = torch.mean(D2_fake)
            #C2_loss_fake = F.cross_entropy(C2_fake, y_true)
            DC2_loss = D2_loss_fake  #+ C2_loss_fake
            DC2_loss.backward()

            # train with gradient penalty (WGAN_GP)
            grad_penalty = calc_gradient_penalty1(netD2, text_feat.data,
                                                  text_sample.data)
            grad_penalty.backward()
            Wasserstein_D2 = D2_loss_real - D2_loss_fake
            optimizerD2.step()
            reset_grad(nets)
        """G2"""
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_att[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()

            text_sample = netG2(z, X)
            D2_fake = netD2(text_sample)
            #_, C2_real = netD2(text_feat)

            # GAN's G loss
            G2_loss = torch.mean(D2_fake)
            # Auxiliary classification loss
            #C2_loss = (F.cross_entropy(C2_real, y_true) + F.cross_entropy(C2_fake, y_true)) / 2

            GC2_loss = -G2_loss  #+ C2_loss

            # ||W||_2 regularization
            reg_loss2 = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for name, p in netG2.named_parameters():
                    if 'weight' in name:
                        reg_loss2 += p.pow(2).sum()
                reg_loss2.mul_(opt.REG_W_LAMBDA)

            # ||W||_2 regularization

            all_loss = GC2_loss + reg_loss2
            all_loss.backward()
            optimizerG2.step()
            reset_grad(nets)
        """Cycle Loss"""
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_att[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()
            z2 = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()

            G_sample = netG(z, text_feat)
            text_sample = netG2(z2, G_sample)

            cycle_loss = 10 * torch.nn.MSELoss()(text_feat, text_sample)
            cycle_loss.backward()

            optimizerG.step()
            optimizerG2.step()
            reset_grad(nets)

        if it % opt.disp_interval == 0 and it:
            acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])

            acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])

            log_text = 'Iter-{}; Was_D: {:.3f}; Was_D2: {:.3f}; Euc_ls: {:.3f}; reg_ls: {:.3f}; reg_ls2: {:.3f}; \n' \
                       'G_loss: {:.3f};G2_loss: {:.3f}; D_loss_real: {:.3f};D2_loss_real: {:.3f}; D_loss_fake: {:.3f};' \
                       'D2_loss_fake: {:.3f}; rl: {:.2f}%; fk: {:.2f}%;cycle: {:.3f} \n'\
                        .format(it, Wasserstein_D.item(), Wasserstein_D2.item(), Euclidean_loss.item(),
                                reg_loss.item(),reg_loss2.item(), G_loss.item(),G2_loss.item(), D_loss_real.item(),
                                D2_loss_real.item(),D_loss_fake.item(),D2_loss_fake.item(),
                                acc_real * 100, acc_fake * 100,cycle_loss)
            print(log_text)
            with open(log_dir, 'a') as f:
                f.write(log_text + '\n')

        if it % opt.evl_interval == 0 and it >= 100:
            netG.eval()
            eval_fakefeat_test(it, netG, dataset, param, result)
            if result.save_model:
                files2remove = glob.glob(out_subdir + '/Best_model_ZSL_*')
                for _i in files2remove:
                    os.remove(_i)
                # best_acc = result.acc_list[-1]
                save_model(
                    it, netG, netD, netG2, netD2, opt.manualSeed, log_text,
                    out_subdir + '/Best_model_ZSL_Acc_{:.2f}.tar'.format(
                        result.acc_list[-1]))

            eval_fakefeat_test_gzsl(it, netG, dataset, param, result_gzsl)
            if result.save_model:
                files2remove = glob.glob(out_subdir + '/Best_model_GZSL_*')
                for _i in files2remove:
                    os.remove(_i)
                # best_acc_gzsl = result.acc_list[-1]
                save_model(
                    it, netG, netD, netG2, netD2, opt.manualSeed, log_text,
                    out_subdir +
                    '/Best_model_GZSL_H_{:.2f}_S_{:.2f}_U_{:.2f}.tar'.format(
                        result_gzsl.best_acc, result_gzsl.best_acc_S_T,
                        result_gzsl.best_acc_U_T))

            netG.train()

        if it % opt.save_interval == 0 and it:
            save_model(it, netG, netD, netG2, netD2, opt.manualSeed, log_text,
                       out_subdir + '/Iter_{:d}.tar'.format(it))
            cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it),
                   'red')