def __init__(self, opt, nclasses, ndomains, mean, std, source_trainloader, source_valloader, targetloader): self.source_trainloader = source_trainloader self.source_valloader = source_valloader self.targetloader = targetloader self.opt = opt self.mean = mean self.std = std self.best_val = 0 self.best_test = 0 self.nclasses = nclasses self.ndomains = ndomains # Defining networks and optimizers self.netF1 = models._netF(opt) self.netF2 = models._netF(opt) self.netC1 = models._netC(opt, nclasses) self.netC2 = models._netC(opt, ndomains) self.netC3 = models._netC(opt, ndomains) self.netG = models._netG(opt, (opt.ndf*2)*2) self.netD = models._netD(opt, nclasses, ndomains) # Weight initialization self.netF1.apply(utils.weights_init) self.netF2.apply(utils.weights_init) self.netC1.apply(utils.weights_init) self.netC2.apply(utils.weights_init) self.netC3.apply(utils.weights_init) self.netG.apply(utils.weights_init) self.netD.apply(utils.weights_init) # Defining loss criterions self.criterion_c = nn.CrossEntropyLoss() self.criterion_s = nn.BCELoss() if opt.gpu>=0: self.netF1.cuda() self.netF2.cuda() self.netC1.cuda() self.netC2.cuda() self.netC3.cuda() self.netG.cuda() self.netD.cuda() self.criterion_c.cuda() self.criterion_s.cuda() # Defining optimizers self.optimizerF1 = optim.Adam(self.netF1.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerF2 = optim.Adam(self.netF2.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerC1 = optim.Adam(self.netC1.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerC2 = optim.Adam(self.netC2.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerC3 = optim.Adam(self.netC3.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerG = optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerD = optim.Adam(self.netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) # Other variables self.real_label_val = 1 self.fake_label_val = 0
def __init__(self, opt, nclasses, mean, std, source_trainloader, source_valloader, target_trainloader, target_valloader): self.source_trainloader = source_trainloader self.source_valloader = source_valloader self.target_trainloader = target_trainloader self.target_valloader = target_valloader self.opt = opt self.mean = mean self.std = std self.best_val = 0 # Defining networks and optimizers self.nclasses = nclasses self.netG = models._netG(opt, nclasses) self.netD = models._netD(opt, nclasses) self.netF = models._netF(opt) self.netC = models._netC(opt, nclasses) # Weight initialization self.netG.apply(utils.weights_init) self.netD.apply(utils.weights_init) self.netF.apply(utils.weights_init) self.netC.apply(utils.weights_init) # Defining loss criterions self.criterion_c = nn.CrossEntropyLoss() self.criterion_s = nn.BCELoss() if opt.gpu >= 0: self.netD.cuda() self.netG.cuda() self.netF.cuda() self.netC.cuda() self.criterion_c.cuda() self.criterion_s.cuda() # Defining optimizers self.optimizerD = optim.Adam(self.netD.parameters(), lr=opt.lr, betas=(0.8, 0.999)) self.optimizerG = optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(0.8, 0.999)) self.optimizerF = optim.Adam(self.netF.parameters(), lr=opt.lr, betas=(0.8, 0.999)) self.optimizerC = optim.Adam(self.netC.parameters(), lr=opt.lr, betas=(0.8, 0.999)) # Other variables self.real_label_val = 1 self.fake_label_val = 0
def __init__(self, mean, std, source_trainloader, source_valloader, targetloader): self.source_trainloader = source_trainloader self.source_valloader = source_valloader self.targetloader = targetloader self.mean = mean self.std = std self.best_val = 0 self.cuda = True if torch.cuda.is_available() else False # Defining networks and optimizers self.netG = models._netG() self.netD = models._netD() self.netF = models._netF() self.netC = models._netC() # Weight initialization self.netG.apply(utils.weights_init) self.netD.apply(utils.weights_init) self.netF.apply(utils.weights_init) self.netC.apply(utils.weights_init) # Defining loss criterions self.criterion_c = nn.CrossEntropyLoss() self.criterion_s = nn.BCELoss() if self.cuda: self.netD.cuda() self.netG.cuda() self.netF.cuda() self.netC.cuda() self.criterion_c.cuda() self.criterion_s.cuda() # Defining optimizers self.optimizerD = optim.Adam(self.netD.parameters(), lr=consts.lr, betas=(consts.beta1, 0.999)) self.optimizerG = optim.Adam(self.netG.parameters(), lr=consts.lr, betas=(consts.beta1, 0.999)) self.optimizerF = optim.Adam(self.netF.parameters(), lr=consts.lr, betas=(consts.beta1, 0.999)) self.optimizerC = optim.Adam(self.netC.parameters(), lr=consts.lr, betas=(consts.beta1, 0.999)) # Other variables self.real_label_val = 1 self.fake_label_val = 0
def __init__(self, opt, nclasses, mean, std, source_trainloader, source_valloader, targetloader): self.source_trainloader = source_trainloader self.source_valloader = source_valloader self.targetloader = targetloader self.opt = opt self.mean = mean self.std = std self.best_val = 0 self.best_test = 0 # Defining networks and optimizers self.nclasses = nclasses self.netG = models._netG(opt, self.nclasses+1+opt.ndf*2) self.netD = models._netD(opt, nclasses, 1) self.netF = models._netF(opt) self.netC = models._netC(opt, nclasses) # Weight initialization self.netG.apply(utils.weights_init) self.netD.apply(utils.weights_init) self.netF.apply(utils.weights_init) self.netC.apply(utils.weights_init) # Defining loss criterions self.criterion_c = nn.CrossEntropyLoss() self.criterion_s = nn.BCELoss() if opt.gpu>=0: self.netD.cuda() self.netG.cuda() self.netF.cuda() self.netC.cuda() self.criterion_c.cuda() self.criterion_s.cuda() # Defining optimizers self.optimizerD = optim.Adam(self.netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerG = optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerF = optim.Adam(self.netF.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerC = optim.Adam(self.netC.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) # Other variables self.real_label_val = 1 self.fake_label_val = 0
def __init__(self, config, exp_idx, seed=1000): self.seed = seed self.exp_idx = exp_idx self.iqa_hash = collections.defaultdict(dict) self.config = read_json(config) self.netG = _netG(self.config["G_fil_num"]).cuda() self.netD = _netD(self.config["D_fil_num"]).cuda() # self.cnn = self.initial_CNN('./cnn_config.json', exp_idx=0, epoch=100) self.initial_CNN('./cnn_config.json', exp_idx=0) if self.config["D_pth"]: self.netD.load_state_dict(torch.load(self.config["D_pth"])) if self.config["G_pth"]: self.netD.load_state_dict(torch.load(self.config["G_pth"])) self.checkpoint_dir = self.config["checkpoint_dir"] if not os.path.exists(self.checkpoint_dir): os.mkdir(self.checkpoint_dir) self.prepare_dataloader() self.log_name = self.config["log_name"] self.iqa_name = self.config["iqa_name"] self.eng = matlab.engine.start_matlab() self.save_every_epoch = self.config["save_every_epoch"]
def train(): param = _param() dataset = DATA_LOADER(opt) param.X_dim = dataset.feature_dim data_layer = FeatDataLayer(dataset.train_label.numpy(), dataset.train_feature.numpy(), opt) result = Result() result_gzsl = Result() netG = _netG_att(opt, dataset.text_dim, dataset.feature_dim).cuda() netG.apply(weights_init) print(netG) netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda() netD.apply(weights_init) print(netD) start_step = 0 nets = [netG, netD] tr_cls_centroid = Variable( torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda() optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9)) for it in range(start_step, 3000 + 1): """ Discriminator """ for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_sem[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() # GAN's D loss D_real, C_real = netD(X) D_loss_real = torch.mean(D_real) C_loss_real = F.cross_entropy(C_real, y_true) DC_loss = opt.Adv_LAMBDA * (-D_loss_real + C_loss_real) DC_loss.backward() # GAN's D loss G_sample = netG(z, text_feat).detach() D_fake, C_fake = netD(G_sample) D_loss_fake = torch.mean(D_fake) C_loss_fake = F.cross_entropy(C_fake, y_true) DC_loss = opt.Adv_LAMBDA * (D_loss_fake + C_loss_fake) DC_loss.backward() # train with gradient penalty (WGAN_GP) grad_penalty = opt.Adv_LAMBDA * calc_gradient_penalty( netD, X.data, G_sample.data) grad_penalty.backward() Wasserstein_D = D_loss_real - D_loss_fake optimizerD.step() reset_grad(nets) """ Generator """ for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_sem[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() G_sample = netG(z, text_feat) D_fake, C_fake = netD(G_sample) _, C_real = netD(X) # GAN's G loss G_loss = torch.mean(D_fake) # Auxiliary classification loss C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true)) / 2 GC_loss = opt.Adv_LAMBDA * (-G_loss + C_loss) # Centroid loss Euclidean_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for i in range(dataset.train_cls_num): sample_idx = (y_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] Euclidean_loss += ( G_sample_cls.mean(dim=0) - tr_cls_centroid[i]).pow(2).sum().sqrt() Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA # ||W||_2 regularization reg_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG.named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) all_loss = GC_loss + Euclidean_loss + reg_loss all_loss.backward() optimizerG.step() reset_grad(nets) if it % opt.disp_interval == 0 and it: acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) # log_text = 'Iter-{}; Was_D: {:.3f}; Euc_ls: {:.3f}; reg_ls: {:.3f}; G_loss: {:.3f}; D_loss_real: {:.3f};' \ # ' D_loss_fake: {:.3f}; rl: {:.2f}%; fk: {:.2f}%'\ # .format(it, Wasserstein_D.data[0], Euclidean_loss.data[0], reg_loss.data[0], # G_loss.data[0], D_loss_real.data[0], D_loss_fake.data[0], acc_real * 100, acc_fake * 100) log_text = 'Iter-{} *********************'.format(it) print(log_text) # with open(log_dir, 'a') as f: # f.write(log_text+'\n') if it % opt.evl_interval == 0 and it >= 100: netG.eval() eval_fakefeat_test(it, netG, dataset, param, result) # eval_fakefeat_test_Hit(it, netG, dataset, param) eval_fakefeat_test_gzsl(it, netG, dataset, param, result_gzsl) netG.train()
def train(opt): param = _param() dataset = LoadDataset(opt) param.X_dim = dataset.feature_dim data_layer = FeatDataLayer(dataset.labels_train, dataset.pfc_feat_data_train, opt) # initialize model netGs = [] netDs = [] parts = 7 if opt.dataset == "CUB2011" else 6 for part in range(parts): netGs.append(_netG(dataset.text_dim, 512).cuda().apply(weights_init)) netDs.append( _netD(dataset.train_cls_num, 512).cuda().apply(weights_init)) start_step = 0 part_cls_centrild = torch.from_numpy( dataset.part_cls_centrild.astype('float32')).cuda() # initialize optimizers optimizerGs = [] optimizerDs = [] for netG in netGs: optimizerGs.append( optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9))) for netD in netDs: optimizerDs.append( optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9))) for it in range(start_step, 3000 + 1): """ Discriminator """ for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array( [dataset.train_text_feature[i, :] for i in labels]) text_feat = torch.from_numpy(text_feat.astype('float32')).cuda() X = torch.from_numpy(feat_data).cuda() y_true = torch.from_numpy(labels.astype('int')).cuda() for part in range(parts): z = torch.randn(opt.batchsize, param.z_dim).cuda() D_real, C_real = netDs[part](X[:, part * 512:(part + 1) * 512]) D_loss_real = torch.mean(D_real) C_loss_real = F.cross_entropy(C_real, y_true) DC_loss = -D_loss_real + C_loss_real DC_loss.backward() G_sample = netGs[part](z, text_feat) D_fake, C_fake = netDs[part](G_sample) D_loss_fake = torch.mean(D_fake) C_loss_fake = F.cross_entropy(C_fake, y_true) DC_loss = D_loss_fake + C_loss_fake DC_loss.backward() grad_penalty = calc_gradient_penalty( opt.batchsize, netDs[part], X.data[:, part * 512:(part + 1) * 512], G_sample.data, opt.GP_LAMBDA) grad_penalty.backward() Wasserstein_D = D_loss_real - D_loss_fake # writer.add_scalar("Wasserstein_D"+str(part), Wasserstein_D.item(), it) optimizerDs[part].step() netGs[part].zero_grad() netDs[part].zero_grad() """ Generator """ for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array( [dataset.train_text_feature[i, :] for i in labels]) text_feat = torch.from_numpy(text_feat.astype('float32')).cuda() X = torch.from_numpy(feat_data).cuda() y_true = torch.from_numpy(labels.astype('int')).cuda() for part in range(parts): z = torch.randn(opt.batchsize, param.z_dim).cuda() G_sample = netGs[part](z, text_feat) # G_sample_all[:, part*512:(part+1)*512] = G_sample D_fake, C_fake = netDs[part](G_sample) _, C_real = netDs[part](X[:, part * 512:(part + 1) * 512]) G_loss = torch.mean(D_fake) C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true)) / 2 GC_loss = -G_loss + C_loss # writer.add_scalar("GC_loss"+str(part), GC_loss.item(), it) Euclidean_loss = torch.tensor([0.0]).cuda() if opt.REG_W_LAMBDA != 0: for i in range(dataset.train_cls_num): sample_idx = (y_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] Euclidean_loss += (G_sample_cls.mean(dim=0) - part_cls_centrild[i][part] ).pow(2).sum().sqrt() Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA # ||W||_2 regularization reg_loss = torch.Tensor([0.0]).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netGs[part].named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) # writer.add_scalar("reg_loss"+str(part), reg_loss.item(), it) # ||W_z||21 regularization, make W_z sparse reg_Wz_loss = torch.Tensor([0.0]).cuda() if opt.REG_Wz_LAMBDA != 0: Wz = netGs[part].rdc_text.weight reg_Wz_loss = reg_Wz_loss + Wz.pow(2).sum( dim=0).sqrt().sum().mul(opt.REG_Wz_LAMBDA) # writer.add_scalar("reg_Wz_loss"+str(part), reg_Wz_loss.item(), it) all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss all_loss.backward() optimizerGs[part].step() if it % opt.evl_interval == 0 and it > 500: print(it) for part in range(parts): netGs[part].eval() train_classifier(opt, param, dataset, netGs) for part in range(parts): netGs[part].train()
def train(im_data, gen_model, method, name_file, niter, n_critic, lr, input_noise): nz = 100 img_size = 64 batch_size = 64 beta1 = 0.5 hyperparameters = {} hyperparameters['nz'] = nz hyperparameters['n_critic'] = n_critic hyperparameters['img_size'] = img_size hyperparameters['batch_size'] = batch_size hyperparameters['lr'] = lr hyperparameters['beta1'] = beta1 dataloader = torch.utils.data.DataLoader(im_data, batch_size, shuffle=True) input = torch.FloatTensor(batch_size, 3, img_size, img_size) noise = torch.FloatTensor(batch_size, nz, 1, 1) fixed_noise = Variable( torch.FloatTensor(batch_size, nz, 1, 1).normal_(0, 1)) label = torch.FloatTensor(batch_size) # real_label = 1 # fake_label = 0 if gen_model == 'nearest': netG = _netG_nearest() elif gen_model == 'bilinear': netG = _netG_bilinear() elif gen_model == 'transposed_conv2d': netG = _netG() netG.apply(weights_init) if method == 'GAN': netD = _netD(method) elif method == 'WGAN': netD = _netD(method) netD.apply(weights_init) criterion = nn.BCEWithLogitsLoss() if method == 'GAN': optimizerD = optim.Adam(netD.parameters(), lr, betas=(beta1, 0.9)) optimizerG = optim.Adam(netG.parameters(), lr, betas=(beta1, 0.9)) elif method == 'WGAN': optimizerD = optim.RMSprop(netD.parameters(), lr=lr) optimizerG = optim.RMSprop(netG.parameters(), lr=lr) if torch.cuda.is_available(): netD.cuda() netG.cuda() criterion.cuda() input, label = input.cuda(), label.cuda() noise, fixed_noise = noise.cuda(), fixed_noise.cuda() epochl = [] wdl = [] errDml = [] errGml = [] errDsl = [] errGsl = [] errorGl = [] errorDl = [] dxl = [] dgz1l = [] dgz2l = [] grad_netDl = [] grad_netGl = [] for epoch in range(niter): errDm = [] errGm = [] wdm = [] for i, data in enumerate(dataloader): for j in range(n_critic): ############################################################# # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) ############################################################# # train with real netD.zero_grad() # #Might want to add some gaussian noise to the data. This is where it's happening. if input_noise == 'yes': sigma = 0.1 gaussian_noise = data.new(data.size()).normal_(0, sigma) normalize = torch.max(torch.abs(data + gaussian_noise)) real_cpu = (data + gaussian_noise) / normalize else: real_cpu = data batch_size = real_cpu.size(0) if torch.cuda.is_available(): real_cpu = real_cpu.cuda() # train with real input.resize_as_(real_cpu).copy_(real_cpu) real_label = gan_label(1, 'D') label.resize_(batch_size).fill_(real_label) inputv = Variable(input) labelv = Variable(label) output = netD(inputv) if method == 'GAN': errD_real = torch.log( output) #criterion(output, labelv) # labelv = real elif method == 'WGAN': errD_real = torch.mean(output) D_x = output.data.mean() # train with fake noise.resize_(batch_size, nz, 1, 1).normal_(0, 1) noisev = Variable(noise) fake = netG(noisev) fake_label = gan_label(0, 'D') labelv = Variable(label.fill_(fake_label)) # 0 output = netD(fake.detach()) if method == 'GAN': errD_fake = torch.log( 1 - output) #criterion(output, labelv) # labelv = fake elif method == 'WGAN': errD_fake = torch.mean(output) D_G_z1 = output.data.mean() grad_penalty = calc_gradient_penalty(netD, inputv, fake) if method == 'GAN': errD = -torch.mean(errD_real + errD_fake) + grad_penalty if method == 'WGAN': errD = -(torch.mean(errD_real) - torch.mean(errD_fake)) + grad_penalty errD.backward() optimizerD.step() if method == 'GAN': pass if method == 'WGAN': for p in netD.parameters(): p.data.clamp_(-0.05, 0.05) wd = torch.mean(errD_real - errD_fake) wdm.append(wd.data[0]) errDm.append(errD.data[0]) ############################################# # (2) Update G network: maximize log(D(G(z))) ############################################# netG.zero_grad() real_label = gan_label(1, 'G') labelv = Variable(label.fill_( real_label)) # fake labels are real for generator cost output = netD( fake ) # between 0, 1 ... it'S a good cop! because, we trained above! if method == 'GAN': errG = -torch.mean(torch.log( output)) #criterion(output, labelv) # labelv = real elif method == 'WGAN': errG = -torch.mean(output) errG.backward() errGm.append(errG.data[0]) D_G_z2 = output.data.mean() optimizerG.step() grad_netD = grad_norm(netD) grad_netG = grad_norm(netG) # torch.nn.utils.clip_grad_norm(netG.parameters(), 0.2, norm_type=2) print( '[{}/{}][{}/{}] Loss_D: {:8f} Loss_G: {:4f} D(x): {:4f} D(G(z)): {:4f} / {:4f} Grad_D: {:2f} Grad_D: {:2f}' .format(epoch + 1, niter, i + 1, len(dataloader), errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2, grad_netD, grad_netG)) errorDl.append(errD.data[0]) errorGl.append(errG.data[0]) dxl.append(D_x) dgz1l.append(D_G_z1) dgz2l.append(D_G_z2) epochl.append(epoch) grad_netDl.append(grad_netD) grad_netGl.append(grad_netG) #print at the end of each epoch. fake = netG(fixed_noise) vutils.save_image( fake.data, 'savedata/figures/{}_{}_fake_samples_epoch_{}_i_{}.png'.format( gen_model, method, epoch, i), normalize=True) # vutils.save_image(real_cpu,'figures/outputs/{}_real_samples_epoch_{}_i_{}.png'.format(gen_model, epoch, i), normalize=True) wdl.append(np.mean(np.array(wdm))) errDml.append(np.mean(np.array(errDm))) errGml.append(np.mean(np.array(errGm))) errDsl.append(np.std(np.array(errDm))) errGsl.append(np.std(np.array(errGm))) torch.save( netG.state_dict(), 'savedata/models/{}_{}_netG_epoch_{}.pth'.format( name_file, method, epoch)) torch.save( netD.state_dict(), 'savedata/models/{}_{}_netD_epoch_{}.pth'.format( name_file, method, epoch)) if epoch % 1 == 0: dd = {} if input_noise == 'yes': dd['sigma'] = sigma else: pass dd['wd'] = wdl dd['gen_model'] = gen_model dd['epoch'] = epochl dd['errDm'] = errDml dd['errGm'] = errGml dd['errDs'] = errDsl dd['errGs'] = errGsl dd['error_d'] = errorDl dd['error_g'] = errorGl dd['dx'] = dxl dd['dgz1'] = dgz1l dd['dgz2'] = dgz2l dd['grad_netD'] = grad_netDl dd['grad_netG'] = grad_netGl filename = 'savedata/data/{}_epoch_{}.pkl'.format(name_file, epoch) with open(filename, 'wb') as f: pickle.dump([hyperparameters, dd], f) dd = {} if input_noise == 'yes': dd['sigma'] = sigma else: pass dd['wd'] = wdl dd['gen_model'] = gen_model dd['epoch'] = epochl dd['errDm'] = errDml dd['errGm'] = errGml dd['errDs'] = errDsl dd['errGs'] = errGsl dd['error_d'] = errorDl dd['error_g'] = errorGl dd['dx'] = dxl dd['dgz1'] = dgz1l dd['dgz2'] = dgz2l dd['grad_netD'] = grad_netDl dd['grad_netG'] = grad_netGl filename = 'savedata/data/{}_epoch_{}.pkl'.format(name_file, epoch) with open(filename, 'wb') as f: pickle.dump([hyperparameters, dd], f) print() print(dd['wd'])
def train(): param = _param() dataset = LoadDataset(opt) param.X_dim = dataset.feature_dim data_layer = FeatDataLayer(dataset.labels_train, dataset.pfc_feat_data_train, opt) # initialize model netGs = [] netDs = [] parts = 6 #if opt.dataset == "CUB2011" else 6 for part in range(parts): netGs.append(_netG(dataset.text_dim, 512).cuda().apply(weights_init)) netDs.append( _netD(dataset.train_cls_num, 512).cuda().apply(weights_init)) exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD' exp_params = 'Eu{}_Rls{}_RWz{}'.format(opt.CENT_LAMBDA, opt.REG_W_LAMBDA, opt.REG_Wz_LAMBDA) out_dir = 'out/{:s}'.format(exp_info) out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params) if not os.path.exists('out'): os.mkdir('out') if not os.path.exists(out_dir): os.mkdir(out_dir) if not os.path.exists(out_subdir): os.mkdir(out_subdir) cprint(" The output dictionary is {}".format(out_subdir), 'red') log_dir = out_subdir + '/log_{:s}.txt'.format(exp_info) with open(log_dir, 'a') as f: f.write('Training Start:') f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n') start_step = 0 part_cls_centrild = torch.from_numpy( dataset.part_cls_centrild.astype('float32')).cuda() # initialize optimizers optimizerGs = [] optimizerDs = [] for netG in netGs: optimizerGs.append( optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9))) for netD in netDs: optimizerDs.append( optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9))) for it in range(start_step, 3000 + 1): """ Discriminator """ for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array( [dataset.train_text_feature[i, :] for i in labels]) text_feat = torch.from_numpy(text_feat.astype('float32')).cuda() X = torch.from_numpy(feat_data).cuda() y_true = torch.from_numpy(labels.astype('int')).cuda() z = torch.randn(opt.batchsize, param.z_dim).cuda() for part in range(parts): z = torch.randn(opt.batchsize, param.z_dim).cuda() D_real, C_real = netDs[part](X[:, part * 512:(part + 1) * 512]) D_loss_real = torch.mean(D_real) C_loss_real = F.cross_entropy(C_real, y_true) DC_loss = -D_loss_real + C_loss_real DC_loss.backward() G_sample = netGs[part](z, text_feat) D_fake, C_fake = netDs[part](G_sample) D_loss_fake = torch.mean(D_fake) C_loss_fake = F.cross_entropy(C_fake, y_true) DC_loss = D_loss_fake + C_loss_fake DC_loss.backward() grad_penalty = calc_gradient_penalty( netDs[part], X.data[:, part * 512:(part + 1) * 512], G_sample.data) grad_penalty.backward() Wasserstein_D = D_loss_real - D_loss_fake # writer.add_scalar("Wasserstein_D"+str(part), Wasserstein_D.item(), it) optimizerDs[part].step() netGs[part].zero_grad() netDs[part].zero_grad() """ Generator """ for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array( [dataset.train_text_feature[i, :] for i in labels]) text_feat = torch.from_numpy(text_feat.astype('float32')).cuda() X = torch.from_numpy(feat_data).cuda() y_true = torch.from_numpy(labels.astype('int')).cuda() for part in range(parts): z = torch.randn(opt.batchsize, param.z_dim).cuda() G_sample = netGs[part](z, text_feat) # G_sample_all[:, part*512:(part+1)*512] = G_sample D_fake, C_fake = netDs[part](G_sample) _, C_real = netDs[part](X[:, part * 512:(part + 1) * 512]) G_loss = torch.mean(D_fake) C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true)) / 2 GC_loss = -G_loss + C_loss # writer.add_scalar("GC_loss"+str(part), GC_loss.item(), it) Euclidean_loss = torch.tensor([0.0]).cuda() if opt.REG_W_LAMBDA != 0: for i in range(dataset.train_cls_num): sample_idx = (y_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] Euclidean_loss += (G_sample_cls.mean(dim=0) - part_cls_centrild[i][part] ).pow(2).sum().sqrt() Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA # ||W||_2 regularization reg_loss = torch.Tensor([0.0]).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netGs[part].named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) # writer.add_scalar("reg_loss"+str(part), reg_loss.item(), it) # ||W_z||21 regularization, make W_z sparse reg_Wz_loss = torch.Tensor([0.0]).cuda() if opt.REG_Wz_LAMBDA != 0: Wz = netGs[part].rdc_text.weight reg_Wz_loss = reg_Wz_loss + Wz.pow(2).sum( dim=0).sqrt().sum().mul(opt.REG_Wz_LAMBDA) # writer.add_scalar("reg_Wz_loss"+str(part), reg_Wz_loss.item(), it) all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss all_loss.backward() optimizerGs[part].step() if it % opt.evl_interval == 0 and it >= 1000: print(it) for part in range(parts): netGs[part].eval() train_classifier(opt, param, dataset, netGs) for part in range(parts): netGs[part].train()
def train(): start_time = time.time() param = _param() dataset = LoadDataset(opt) param.X_dim = dataset.feature_dim data_layer = FeatDataLayer(dataset.labels_train, dataset.pfc_feat_data_train, dataset.seen_label_mapping, opt) result = Result() result_gzsl = Result() netG = _netG(dataset.text_dim, dataset.feature_dim).cuda() netG.apply(weights_init) print(netG) netD = _netD(dataset.train_cls_num + dataset.test_cls_num, dataset.feature_dim).cuda() netD.apply(weights_init) print(netD) exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD' exp_params = 'Eu{}_Rls{}_RWz{}'.format(opt.CENT_LAMBDA , opt.REG_W_LAMBDA, opt.REG_Wz_LAMBDA) out_dir = 'out_' + str(opt.epsilon) + '/{:s}'.format(exp_info) out_subdir = 'out_' + str(opt.epsilon) + '/{:s}/{:s}'.format(exp_info, exp_params) if not os.path.exists('out_' + str(opt.epsilon) ): os.mkdir('out_' + str(opt.epsilon)) if not os.path.exists(out_dir): os.mkdir(out_dir) if not os.path.exists(out_subdir): os.mkdir(out_subdir) cprint(" The output dictionary is {}".format(out_subdir), 'red') log_dir = out_subdir + '/log_{:s}.txt'.format(exp_info) with open(log_dir, 'a') as f: f.write('Training Start:') f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n') f.write("Running Parameter Logs") f.write(runing_parameters_logs) start_step = 0 if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) netG.load_state_dict(checkpoint['state_dict_G']) netD.load_state_dict(checkpoint['state_dict_D']) start_step = checkpoint['it'] print(checkpoint['log']) log_text = checkpoint['log'] else: print("=> no checkpoint found at '{}'".format(opt.resume)) nets = [netG, netD] tr_cls_centroid = Variable(torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda() optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9)) for it in range(start_step, 5000+1): if it > opt.mode_change: train_text = Variable(torch.from_numpy(dataset.train_text_feature.astype('float32'))).cuda() test_text = Variable(torch.from_numpy(dataset.test_text_feature.astype('float32'))).cuda() z_train = Variable(torch.randn(dataset.train_cls_num, param.z_dim)).cuda() z_test = Variable(torch.randn(dataset.test_cls_num, param.z_dim)).cuda() _, train_text_feature = netG(z_train, train_text) _, test_text_feature = netG(z_test, test_text) dataset.semantic_similarity_check(opt.Knn, train_text_feature.data.cpu().numpy(), test_text_feature.data.cpu().numpy()) """ Discriminator """ for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels true_labels = blobs['true_labels'].astype(int) text_feat = np.array([dataset.train_text_feature[i,:] for i in labels]) text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(true_labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() # GAN's D loss D_real, C_real = netD(X) D_loss_real = torch.mean(D_real) C_loss_real = F.cross_entropy(C_real, y_true) DC_loss = -D_loss_real + C_loss_real DC_loss.backward() # GAN's D loss G_sample, _ = netG(z, text_feat) D_fake, C_fake = netD(G_sample) D_loss_fake = torch.mean(D_fake) C_loss_fake = F.cross_entropy(C_fake, y_true) DC_loss = D_loss_fake + C_loss_fake DC_loss.backward() # train with gradient penalty (WGAN_GP) grad_penalty = calc_gradient_penalty(netD, X.data, G_sample.data) grad_penalty.backward() Wasserstein_D = D_loss_real - D_loss_fake optimizerD.step() reset_grad(nets) """ Generator """ for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels true_labels = blobs['true_labels'].astype(int) #True seen label class text_feat = np.array([dataset.train_text_feature[i, :] for i in labels]) text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(true_labels.astype('int'))).cuda() y_dummy = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() G_sample, _ = netG(z, text_feat) D_fake, C_fake = netD(G_sample) _, C_real = netD(X) # GAN's G loss G_loss = torch.mean(D_fake) # Auxiliary classification loss C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true))/2 GC_loss = -G_loss + C_loss # Centroid loss Euclidean_loss = Variable(torch.Tensor([0.0])).cuda() Correlation_loss = Variable(torch.Tensor([0.0])).cuda() if opt.CENT_LAMBDA != 0: for i in range(dataset.train_cls_num): sample_idx = (y_dummy == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] if sample_idx.numel() != 1: generated_mean = G_sample_cls.mean(dim=0) else: generated_mean = G_sample_cls Euclidean_loss += (generated_mean - tr_cls_centroid[i]).pow(2).sum().sqrt() for n in range(dataset.Neighbours): Neighbor_correlation = cosine_similarity(generated_mean.data.cpu().numpy().reshape((1, dataset.feature_dim)), tr_cls_centroid[dataset.idx_mat[i,n]].data.cpu().numpy().reshape((1, dataset.feature_dim))) lower_limit = dataset.semantic_similarity_seen [i,n] - opt.epsilon upper_limit = dataset.semantic_similarity_seen [i,n] + opt.epsilon lower_limit = torch.as_tensor(lower_limit.astype('float')) upper_limit = torch.as_tensor(upper_limit.astype('float')) corr = torch.as_tensor(Neighbor_correlation[0][0].astype('float')) margin = (torch.max(corr- corr, corr - upper_limit))**2 + (torch.max(corr- corr, lower_limit - corr ))**2 Correlation_loss += margin Euclidean_loss *= 1.0/dataset.train_cls_num * opt.CENT_LAMBDA Correlation_loss = Correlation_loss * opt.correlation_penalty # ||W||_2 regularization reg_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG.named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) # ||W_z||21 regularization, make W_z sparse reg_Wz_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_Wz_LAMBDA != 0: Wz = netG.rdc_text.weight reg_Wz_loss = Wz.pow(2).sum(dim=0).sqrt().sum().mul(opt.REG_Wz_LAMBDA) all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss + Correlation_loss all_loss.backward() optimizerG.step() reset_grad(nets) if (it > opt.unseen_start): for _ in range(1): # Zero shot Discriminator is training zero_shot_labels = np.random.randint(dataset.test_cls_num, size = opt.zeroshotbatchsize).astype(int) zero_shot_true_labels = np.array([dataset.unseen_label_mapping[i] for i in zero_shot_labels]) zero_text_feat = np.array([dataset.test_text_feature[i,:] for i in zero_shot_labels]) zero_text_feat = Variable(torch.from_numpy(zero_text_feat.astype('float32'))).cuda() zero_y_true = Variable(torch.from_numpy(zero_shot_true_labels.astype('int'))).cuda() z = Variable(torch.randn(opt.zeroshotbatchsize, param.z_dim)).cuda() # GAN's D loss G_sample_zero, _ = netG(z, zero_text_feat) _, C_fake_zero = netD(G_sample_zero) C_loss_fake_zero = F.cross_entropy(C_fake_zero, zero_y_true) C_loss_fake_zero.backward() optimizerD.step() reset_grad(nets) for _ in range(1): # Zero shot Generator is training zero_shot_labels = np.random.randint(dataset.test_cls_num, size = opt.zeroshotbatchsize).astype(int) zero_shot_true_labels = np.array([dataset.unseen_label_mapping[i] for i in zero_shot_labels]) zero_text_feat = np.array([dataset.test_text_feature[i,:] for i in zero_shot_labels]) zero_text_feat = Variable(torch.from_numpy(zero_text_feat.astype('float32'))).cuda() zero_y_true = Variable(torch.from_numpy(zero_shot_true_labels.astype('int'))).cuda() y_dummy_zero = Variable(torch.from_numpy(zero_shot_labels.astype('int'))).cuda() z = Variable(torch.randn(opt.zeroshotbatchsize, param.z_dim)).cuda() # GAN's D loss G_sample_zero, _ = netG(z, zero_text_feat) _, C_fake_zero = netD(G_sample_zero) C_loss_fake_zero = F.cross_entropy(C_fake_zero, zero_y_true) Correlation_loss_zero = Variable(torch.Tensor([0.0])).cuda() if opt.CENT_LAMBDA != 0: for i in range(dataset.test_cls_num): sample_idx = (y_dummy_zero == i).data.nonzero().squeeze() if sample_idx.numel() != 0: G_sample_cls = G_sample_zero[sample_idx, :] if sample_idx.numel() != 1: generated_mean = G_sample_cls.mean(dim=0) else: generated_mean = G_sample_cls for n in range(dataset.Neighbours): Neighbor_correlation = cosine_similarity(generated_mean.data.cpu().numpy().reshape((1, dataset.feature_dim)), tr_cls_centroid[dataset.unseen_idx_mat[i,n]].data.cpu().numpy().reshape((1, dataset.feature_dim))) lower_limit = dataset.semantic_similarity_unseen [i,n] - opt.epsilon upper_limit = dataset.semantic_similarity_unseen [i,n] + opt.epsilon lower_limit = torch.as_tensor(lower_limit.astype('float')) upper_limit = torch.as_tensor(upper_limit.astype('float')) corr = torch.as_tensor(Neighbor_correlation[0][0].astype('float')) margin = (torch.max(corr- corr, corr - upper_limit))**2 + (torch.max(corr- corr, lower_limit - corr ))**2 Correlation_loss_zero += margin Correlation_loss_zero = Correlation_loss_zero *opt.correlation_penalty # ||W||_2 regularization reg_loss_zero = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG.named_parameters(): if 'weight' in name: reg_loss_zero += p.pow(2).sum() reg_loss_zero.mul_(opt.REG_W_LAMBDA) # ||W_z||21 regularization, make W_z sparse reg_Wz_loss_zero = Variable(torch.Tensor([0.0])).cuda() if opt.REG_Wz_LAMBDA != 0: Wz = netG.rdc_text.weight reg_Wz_loss_zero = Wz.pow(2).sum(dim=0).sqrt().sum().mul(opt.REG_Wz_LAMBDA) all_loss = C_loss_fake_zero + reg_loss_zero + reg_Wz_loss_zero + Correlation_loss_zero all_loss.backward() optimizerG.step() reset_grad(nets) if it % opt.disp_interval == 0 and it: acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0]) acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0]) log_text = 'Iter-{}; Was_D: {:.4}; Euc_ls: {:.4};reg_ls: {:.4}; Wz_ls: {:.4}; G_loss: {:.4}; Correlation_loss : {:.4} ; D_loss_real: {:.4};' \ ' D_loss_fake: {:.4}; rl: {:.4}%; fk: {:.4}%'.format(it, Wasserstein_D.item(), Euclidean_loss.item(), reg_loss.item(), reg_Wz_loss.item(), G_loss.item(), Correlation_loss.item() , D_loss_real.item(), D_loss_fake.item(), acc_real * 100, acc_fake * 100) log_text1 = "" if it > opt.unseen_start : acc_fake_zero = (np.argmax(C_fake_zero.data.cpu().numpy(), axis=1) == zero_y_true.data.cpu().numpy()).sum() / float(zero_y_true.data.size()[0]) log_text1 = 'Zero_Shot_Iter-{}; Correlation_loss : {:.4}; fk: {:.4}%'.format(it, Correlation_loss_zero.item(), acc_fake_zero * 100) ''' Here I have added .item instead of the .data[0] - Maunil ''' print(log_text) print (log_text1) with open(log_dir, 'a') as f: f.write(log_text+'\n') f.write(log_text1+'\n') if it % opt.evl_interval == 0 and it >=80 and log_text != None: netG.eval() # This will start the testing process, no batch norm and drop out - It will disable them eval_fakefeat_test(it, netG, netD, dataset, param, result) eval_fakefeat_GZSL(it, netG, dataset, param, result_gzsl) if result.save_model: files2remove = glob.glob(out_subdir + '/Best_model*') for _i in files2remove: os.remove(_i) torch.save({ 'it': it + 1, 'state_dict_G': netG.state_dict(), 'state_dict_D': netD.state_dict(), 'random_seed': opt.manualSeed, 'log': log_text, 'Zero Shot Acc' : result.acc_list[-1], 'Generalized Zero Shot Acc' : result_gzsl.acc_list[-1] }, out_subdir + '/Best_model_Acc_' + str(result.acc_list[-1]) + '_AUC_' + str(result_gzsl.acc_list[-1]) + '_' +'.tar') netG.train() if it % opt.save_interval == 0 and it: torch.save({ 'it': it + 1, 'state_dict_G': netG.state_dict(), 'state_dict_D': netD.state_dict(), 'random_seed': opt.manualSeed, 'log': log_text, 'Zero Shot Acc' : result.acc_list[-1], 'Generalized Zero Shot Acc' : result_gzsl.acc_list[-1] }, out_subdir + '/Iter_{:d}.tar'.format(it)) cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it), 'red') print ("########################################################") print("--- %s Time took seconds ---" % (time.time() - start_time)) print ("########################################################")
def train(model_num=3, is_val=True, sim_func_number=None, creative_weight=None): param = _param(opt.z_dim) best_model_acc_path = best_model_auc_path = best_model_hm_path = '' if opt.dataset == 'CUB': dataset = LoadDataset(opt, main_dir, is_val) exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD' opt.is_gbu = False elif opt.dataset == 'NAB': dataset = LoadDataset_NAB(opt, main_dir, is_val) exp_info = 'NAB_EASY' if opt.splitmode == 'easy' else 'NAB_HARD' opt.is_gbu = False elif "GBU" in opt.dataset: opt.dataset = opt.dataset.split('_')[1] opt.is_gbu = True exp_info = opt.dataset dataset = LoadDataset_GBU(opt, main_dir, is_val) else: print('No Dataset with that name') sys.exit(0) param.X_dim = dataset.feature_dim data_layer = FeatDataLayer(np.array(dataset.train_label), np.array(dataset.train_feature), opt) result = Result() ones = Variable(torch.Tensor(1, 1)) ones.data.fill_(1.0) if opt.is_gbu: netG = _netG_att(param, dataset.text_dim, dataset.feature_dim).cuda() else: netG = _netG(dataset.text_dim, dataset.feature_dim).cuda() netG.apply(weights_init) netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda() netD.apply(weights_init) if model_num == 2 or model_num == 4: log_SM_ab = Scale(2) log_SM_ab = nn.DataParallel(log_SM_ab).cuda() if model_num == 3 or model_num == 4: netT = _netT(dataset.train_cls_num, dataset.feature_dim, dataset.text_dim).cuda() netT.apply(weights_init) similarity_func = None if sim_func_number == 1: similarity_func = F.cosine_similarity elif sim_func_number == 2: similarity_func = F.mse_loss exp_params = 'Model_{}_is_val_{}_sim_func_number_{}_creative_weight_{}_lr_{}_zdim_{}_{}'.format( model_num, is_val, sim_func_number, creative_weight, opt.lr, param.z_dim, opt.exp_name) out_subdir = main_dir + 'out/{:s}/{:s}'.format(exp_info, exp_params) if not os.path.exists(out_subdir): os.makedirs(out_subdir) log_dir = out_subdir + '/log_{:s}.txt'.format(exp_info) log_dir_2 = out_subdir + '/log_{:s}_iterations.txt'.format(exp_info) with open(log_dir, 'a') as f: f.write('Training Start:') f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n') start_step = 0 if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) netG.load_state_dict(checkpoint['state_dict_G']) netD.load_state_dict(checkpoint['state_dict_D']) if model_num == 3 or model_num == 4: netT.load_state_dict(checkpoint['state_dict_T']) start_step = checkpoint['it'] print(checkpoint['log']) else: print("=> no checkpoint found at '{}'".format(opt.resume)) if model_num == 1: nets = [netG, netD] elif model_num == 2: nets = [netG, netD, log_SM_ab] elif model_num == 3: nets = [netG, netD, netT] elif model_num == 4: nets = [netG, netD, netT, log_SM_ab] tr_cls_centroid = Variable( torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda() optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9)) if model_num == 2 or model_num == 4: optimizer_SM_ab = optim.Adam(log_SM_ab.parameters(), lr=opt.lr, betas=(0.5, 0.999)) if model_num == 3 or model_num == 4: optimizerT = optim.Adam(netT.parameters(), lr=opt.lr, betas=(0.5, 0.9)) for it in tqdm(range(start_step, 5000 + 1)): blobs = data_layer.forward() labels = blobs['labels'].astype(int) new_class_labels = Variable( torch.from_numpy(np.ones_like(labels) * dataset.train_cls_num)).cuda() text_feat_1 = np.array([dataset.train_att[i, :] for i in labels]) text_feat_2 = np.array([dataset.train_att[i, :] for i in labels]) np.random.shuffle( text_feat_1 ) # Shuffle both features to guarantee different permutations np.random.shuffle(text_feat_2) alpha = (np.random.random(len(labels)) * (.8 - .2)) + .2 text_feat_mean = np.multiply(alpha, text_feat_1.transpose()) text_feat_mean += np.multiply(1. - alpha, text_feat_2.transpose()) text_feat_mean = text_feat_mean.transpose() text_feat_mean = normalize(text_feat_mean, norm='l2', axis=1) text_feat_Creative = Variable( torch.from_numpy(text_feat_mean.astype('float32'))).cuda() # z_creative = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() # G_creative_sample = netG(z_creative, text_feat_Creative) if model_num == 3 or model_num == 4: """ Text Feat Generator """ for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_att[i, :] for i in labels]) text_feat_TG = Variable( torch.from_numpy(text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy( labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() # GAN's T loss T_real = netT(X) T_loss_real = torch.mean(similarity_func(text_feat_TG, T_real)) # GAN's T loss G_sample = netG(z, text_feat_TG).detach() T_fake_TG = netT(G_sample) T_loss_fake = torch.mean( similarity_func(text_feat_TG, T_fake_TG)) # GAN's T loss G_sample_creative = netG(z, text_feat_Creative).detach() T_fake_creative_TG = netT(G_sample_creative) T_loss_fake_creative = torch.mean( similarity_func(text_feat_Creative, T_fake_creative_TG)) T_loss = -1 * T_loss_real - T_loss_fake - T_loss_fake_creative T_loss.backward() optimizerT.step() optimizerG.step() reset_grad(nets) """ Discriminator """ for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_att[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() # GAN's D loss D_real, C_real = netD(X) D_loss_real = torch.mean(D_real) C_loss_real = F.cross_entropy(C_real, y_true) DC_loss = -D_loss_real + C_loss_real DC_loss.backward() # GAN's D loss G_sample = netG(z, text_feat).detach() D_fake, C_fake = netD(G_sample) D_loss_fake = torch.mean(D_fake) C_loss_fake = F.cross_entropy(C_fake, y_true) DC_loss = D_loss_fake + C_loss_fake DC_loss.backward() # train with gradient penalty (WGAN_GP) grad_penalty = calc_gradient_penalty(netD, X.data, G_sample.data) grad_penalty.backward() Wasserstein_D = D_loss_real - D_loss_fake optimizerD.step() reset_grad(nets) """ Generator """ for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_att[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() G_sample = netG(z, text_feat) D_fake, C_fake = netD(G_sample) _, C_real = netD(X) # GAN's G loss G_loss = torch.mean(D_fake) # Auxiliary classification loss C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true)) / 2 # GAN's G loss creative G_sample_creative = netG(z, text_feat_Creative).detach() if model_num == 3 or model_num == 4: D_creative_fake, _ = netD(G_sample_creative) G_loss_fake_creative = torch.mean(D_creative_fake) T_fake = netT(G_sample) T_loss_fake = torch.mean(similarity_func(text_feat, T_fake)) T_fake_creative = netT(G_sample_creative) T_loss_fake_creative = torch.mean( similarity_func(text_feat_Creative, T_fake_creative)) GC_loss = -G_loss - G_loss_fake_creative + C_loss - T_loss_fake - T_loss_fake_creative else: GC_loss = -G_loss + C_loss # Centroid loss Euclidean_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for i in range(dataset.train_cls_num): sample_idx = (y_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] Euclidean_loss += ( G_sample_cls.mean(dim=0) - tr_cls_centroid[i]).pow(2).sum().sqrt() Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA # ||W||_2 regularization reg_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG.named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) # ||W_z||21 regularization, make W_z sparse reg_Wz_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_Wz_LAMBDA != 0 and not opt.is_gbu: Wz = netG.rdc_text.weight reg_Wz_loss = Wz.pow(2).sum(dim=0).sqrt().sum().mul( opt.REG_Wz_LAMBDA) if model_num == 2 or model_num == 4: # D(C| GX_fake)) + Classify GX_fake as real D_creative_fake, C_creative_fake = netD(G_sample_creative) G_fake_C = F.softmax(C_creative_fake) # SM Divergence q_shape = Variable( torch.FloatTensor(G_fake_C.data.size(0), G_fake_C.data.size(1))).cuda() q_shape.data.fill_(1.0 / G_fake_C.data.size(1)) SM_ab = F.sigmoid(log_SM_ab(ones)) SM_a = 0.2 + torch.div(SM_ab[0][0], 1.6666666666666667).cuda() SM_b = 0.2 + torch.div(SM_ab[0][1], 1.6666666666666667).cuda() pow_a_b = torch.div(1 - SM_a, 1 - SM_b) alpha_term = (torch.pow(G_fake_C + 1e-5, SM_a) * torch.pow(q_shape, 1 - SM_a)).sum(1) entropy_GX_fake_vec = torch.div( torch.pow(alpha_term, pow_a_b) - 1, SM_b - 1) min_e, max_e = torch.min(entropy_GX_fake_vec), torch.max( entropy_GX_fake_vec) entropy_GX_fake_vec = (entropy_GX_fake_vec - min_e) / (max_e - min_e) entropy_GX_fake = -entropy_GX_fake_vec.mean() loss_creative = -creative_weight * entropy_GX_fake disc_GX_fake_real = -torch.mean(D_creative_fake) total_loss_creative = loss_creative + disc_GX_fake_real all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss + total_loss_creative else: all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss all_loss.backward() if model_num == 2 or model_num == 4: optimizer_SM_ab.step() optimizerG.step() reset_grad(nets) if it % opt.disp_interval == 0 and it: acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) log_text = 'Iter-{}; rl: {:.4}%; fk: {:.4}%'.format( it, acc_real * 100, acc_fake * 100) with open(log_dir, 'a') as f: f.write(log_text + '\n') if it % opt.evl_interval == 0 and it > opt.disp_interval: cur_acc = 0 cur_auc = 0 cur_hm = 0 netG.eval() if is_val: cur_acc = eval_fakefeat_test(netG, dataset.val_cls_num, dataset.val_att, dataset.val_unseen_feature, dataset.val_unseen_label, param, result) if opt.is_gbu: cur_hm, acc_S_T, acc_U_T = eval_fakefeat_test_gzsl( netG, dataset, dataset.val_cls_num, dataset.val_att, dataset.val_unseen_feature, dataset.val_unseen_label, param, result) else: cur_auc = eval_fakefeat_GZSL(netG, dataset, dataset.val_cls_num, dataset.val_att, dataset.val_unseen_feature, dataset.val_unseen_label, param, out_subdir, result) else: cur_acc = eval_fakefeat_test(netG, dataset.test_cls_num, dataset.test_att, dataset.test_unseen_feature, dataset.test_unseen_label, param, result) if opt.is_gbu: cur_hm, acc_S_T, acc_U_T = eval_fakefeat_test_gzsl( netG, dataset, dataset.test_cls_num, dataset.test_att, dataset.test_unseen_feature, dataset.test_unseen_label, param, result) else: cur_auc = eval_fakefeat_GZSL(netG, dataset, dataset.test_cls_num, dataset.test_att, dataset.test_unseen_feature, dataset.test_unseen_label, param, out_subdir, result) if cur_acc > result.best_acc: result.best_acc = cur_acc files2remove = glob.glob(out_subdir + '/Best_model_ACC*') for _i in files2remove: os.remove(_i) save_dict = { 'it': it + 1, 'state_dict_G': netG.state_dict(), 'state_dict_D': netD.state_dict(), 'random_seed': opt.manualSeed, 'log': log_text, } if model_num == 3 or model_num == 4: save_dict.update({'state_dict_T': netT.state_dict()}) best_model_acc_path = '/Best_model_ACC_{:.2f}.tar'.format( cur_acc) torch.save(save_dict, out_subdir + best_model_acc_path) if cur_auc > result.best_auc: result.best_auc = cur_auc files2remove = glob.glob(out_subdir + '/Best_model_AUC*') for _i in files2remove: os.remove(_i) save_dict = { 'it': it + 1, 'state_dict_G': netG.state_dict(), 'state_dict_D': netD.state_dict(), 'random_seed': opt.manualSeed, 'log': log_text, } if model_num == 3 or model_num == 4: save_dict.update({'state_dict_T': netT.state_dict()}) best_model_auc_path = '/Best_model_AUC_{:.2f}.tar'.format( cur_auc) torch.save(save_dict, out_subdir + best_model_auc_path) if cur_hm > result.best_hm: result.best_hm = cur_hm result.best_acc_S_T = acc_S_T result.best_acc_U_T = acc_U_T files2remove = glob.glob(out_subdir + '/Best_model_HM*') for _i in files2remove: os.remove(_i) save_dict = { 'it': it + 1, 'state_dict_G': netG.state_dict(), 'state_dict_D': netD.state_dict(), 'random_seed': opt.manualSeed, 'log': log_text, } if model_num == 3 or model_num == 4: save_dict.update({'state_dict_T': netT.state_dict()}) best_model_hm_path = '/Best_model_HM_{:.2f}.tar'.format(cur_hm) torch.save(save_dict, out_subdir + best_model_hm_path) log_text_2 = 'iteration: %f, best_acc: %f, best_auc: %f, best_hm: %f' % ( it, result.best_acc, result.best_auc, result.best_hm) with open(log_dir_2, 'a') as f: f.write(log_text_2 + '\n') netG.train() if is_val: if os.path.isfile(out_subdir + best_model_acc_path): print("=> loading checkpoint '{}'".format(best_model_acc_path)) checkpoint = torch.load(out_subdir + best_model_acc_path) netG.load_state_dict(checkpoint['state_dict_G']) netD.load_state_dict(checkpoint['state_dict_D']) if model_num == 3 or model_num == 4: netT.load_state_dict(checkpoint['state_dict_T']) it = checkpoint['it'] print("iteration: {}".format(it)) netG.eval() test_acc = eval_fakefeat_test(netG, dataset.test_cls_num, dataset.test_att, dataset.test_unseen_feature, dataset.test_unseen_label, param, result) result.test_acc = test_acc else: print("=> no checkpoint found at '{}'".format(out_subdir + best_model_acc_path)) if os.path.isfile(out_subdir + best_model_auc_path): print("=> loading checkpoint '{}'".format(best_model_auc_path)) checkpoint = torch.load(out_subdir + best_model_auc_path) netG.load_state_dict(checkpoint['state_dict_G']) netD.load_state_dict(checkpoint['state_dict_D']) if model_num == 3 or model_num == 4: netT.load_state_dict(checkpoint['state_dict_T']) it = checkpoint['it'] print("iteration: {}".format(it)) netG.eval() test_auc = eval_fakefeat_GZSL(netG, dataset, dataset.test_cls_num, dataset.test_att, dataset.test_unseen_feature, dataset.test_unseen_label, param, out_subdir, result) result.test_auc = test_auc else: print("=> no checkpoint found at '{}'".format(out_subdir + best_model_auc_path)) if os.path.isfile(out_subdir + best_model_hm_path): print("=> loading checkpoint '{}'".format(best_model_hm_path)) checkpoint = torch.load(out_subdir + best_model_hm_path) netG.load_state_dict(checkpoint['state_dict_G']) netD.load_state_dict(checkpoint['state_dict_D']) if model_num == 3 or model_num == 4: netT.load_state_dict(checkpoint['state_dict_T']) it = checkpoint['it'] print("iteration: {}".format(it)) netG.eval() test_hm, test_acc_S_T, test_acc_U_T = eval_fakefeat_test_gzsl( netG, dataset, dataset.test_cls_num, dataset.test_att, dataset.test_unseen_feature, dataset.test_unseen_label, param, result) result.test_hm = test_hm result.test_acc_S_T = test_acc_S_T result.test_acc_U_T = test_acc_U_T else: print("=> no checkpoint found at '{}'".format(out_subdir + best_model_hm_path)) log_text_2 = 'test_acc: %f, test_auc: %f, test_hm: %f, test_acc_S_T: %f, test_acc_U_T: %f' % ( result.test_acc, result.test_auc, result.test_hm, result.test_acc_S_T, result.test_acc_U_T) with open(log_dir_2, 'a') as f: f.write(log_text_2 + '\n') return result
def __init__(self, opt, nclasses, mean, std, source_trainloader, source_valloader, targetloader, class_balance, augment): self.source_trainloader = source_trainloader self.source_valloader = source_valloader self.targetloader = targetloader self.opt = opt self.mean = mean self.std = std self.best_val = 0 # Defining networks and optimizers self.nclasses = nclasses self.netG = models._netG(opt, nclasses) self.netD = models._netD(opt, nclasses) self.netF = models._netF(opt) self.netC = models._netC(opt, nclasses) # Weight initialization self.netG.apply(utils.weights_init) self.netD.apply(utils.weights_init) self.netF.apply(utils.weights_init) self.netC.apply(utils.weights_init) # Defining loss criterions self.criterion_c = nn.CrossEntropyLoss() self.criterion_s = nn.BCELoss() if opt.gpu >= 0: self.netD.cuda() self.netG.cuda() self.netF.cuda() self.netC.cuda() self.criterion_c.cuda() self.criterion_s.cuda() # Defining optimizers self.optimizerD = optim.Adam(self.netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerG = optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerF = optim.Adam(self.netF.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerC = optim.Adam(self.netC.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) # Other variables self.real_label_val = 1 self.fake_label_val = 0 self.augment = augment self.class_balance = class_balance self.uniform_cls_distribution = torch.ones(self.nclasses) * float( 1.0 / self.nclasses) self.cls_bal_fn = robust_binary_crossentropy if self.opt.gpu >= 0: self.uniform_cls_distribution = self.uniform_cls_distribution.cuda( )
def train(): param = _param() dataset = LoadDataset(opt) param.X_dim = dataset.feature_dim data_layer = FeatDataLayer(dataset.labels_train, dataset.pfc_feat_data_train, opt) result = Result() netG = _netG(dataset.text_dim, dataset.feature_dim).cuda() netG.apply(weights_init) print(netG) netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda() netD.apply(weights_init) print(netD) exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD' exp_params = 'Eu{}_Rls{}_RWz{}'.format(opt.CENT_LAMBDA , opt.REG_W_LAMBDA, opt.REG_Wz_LAMBDA) out_dir = 'out/{:s}'.format(exp_info) out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params) if not os.path.exists('out'): os.mkdir('out') if not os.path.exists(out_dir): os.mkdir(out_dir) if not os.path.exists(out_subdir): os.mkdir(out_subdir) cprint(" The output dictionary is {}".format(out_subdir), 'red') log_dir = out_subdir + '/log_{:s}.txt'.format(exp_info) with open(log_dir, 'a') as f: f.write('Training Start:') f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n') start_step = 0 if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) netG.load_state_dict(checkpoint['state_dict_G']) netD.load_state_dict(checkpoint['state_dict_D']) start_step = checkpoint['it'] print(checkpoint['log']) else: print("=> no checkpoint found at '{}'".format(opt.resume)) nets = [netG, netD] tr_cls_centroid = Variable(torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda() optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9)) for it in range(start_step, 3000+1): """ Discriminator """ for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_text_feature[i,:] for i in labels]) text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() # GAN's D loss D_real, C_real = netD(X) D_loss_real = torch.mean(D_real) C_loss_real = F.cross_entropy(C_real, y_true) DC_loss = -D_loss_real + C_loss_real DC_loss.backward() # GAN's D loss G_sample = netG(z, text_feat).detach() D_fake, C_fake = netD(G_sample) D_loss_fake = torch.mean(D_fake) C_loss_fake = F.cross_entropy(C_fake, y_true) DC_loss = D_loss_fake + C_loss_fake DC_loss.backward() # train with gradient penalty (WGAN_GP) grad_penalty = calc_gradient_penalty(netD, X.data, G_sample.data) grad_penalty.backward() Wasserstein_D = D_loss_real - D_loss_fake optimizerD.step() reset_grad(nets) """ Generator """ for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_text_feature[i, :] for i in labels]) text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() G_sample = netG(z, text_feat) D_fake, C_fake = netD(G_sample) _, C_real = netD(X) # GAN's G loss G_loss = torch.mean(D_fake) # Auxiliary classification loss C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true))/2 GC_loss = -G_loss + C_loss # Centroid loss Euclidean_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for i in range(dataset.train_cls_num): sample_idx = (y_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] Euclidean_loss += (G_sample_cls.mean(dim=0) - tr_cls_centroid[i]).pow(2).sum().sqrt() Euclidean_loss *= 1.0/dataset.train_cls_num * opt.CENT_LAMBDA # ||W||_2 regularization reg_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG.named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) # ||W_z||21 regularization, make W_z sparse reg_Wz_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_Wz_LAMBDA != 0: Wz = netG.rdc_text.weight reg_Wz_loss = Wz.pow(2).sum(dim=0).sqrt().sum().mul(opt.REG_Wz_LAMBDA) all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss all_loss.backward() optimizerG.step() reset_grad(nets) if it % opt.disp_interval == 0 and it: acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0]) acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0]) log_text = 'Iter-{}; Was_D: {:.4}; Euc_ls: {:.4}; reg_ls: {:.4}; Wz_ls: {:.4}; G_loss: {:.4}; D_loss_real: {:.4};' \ ' D_loss_fake: {:.4}; rl: {:.4}%; fk: {:.4}%'\ .format(it, Wasserstein_D.data[0], Euclidean_loss.data[0], reg_loss.data[0],reg_Wz_loss.data[0], G_loss.data[0], D_loss_real.data[0], D_loss_fake.data[0], acc_real * 100, acc_fake * 100) print(log_text) with open(log_dir, 'a') as f: f.write(log_text+'\n') if it % opt.evl_interval == 0 and it >= 100: netG.eval() eval_fakefeat_test(it, netG, dataset, param, result) if result.save_model: files2remove = glob.glob(out_subdir + '/Best_model*') for _i in files2remove: os.remove(_i) torch.save({ 'it': it + 1, 'state_dict_G': netG.state_dict(), 'state_dict_D': netD.state_dict(), 'random_seed': opt.manualSeed, 'log': log_text, }, out_subdir + '/Best_model_Acc_{:.2f}.tar'.format(result.acc_list[-1])) netG.train() if it % opt.save_interval == 0 and it: torch.save({ 'it': it + 1, 'state_dict_G': netG.state_dict(), 'state_dict_D': netD.state_dict(), 'random_seed': opt.manualSeed, 'log': log_text, }, out_subdir + '/Iter_{:d}.tar'.format(it)) cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it), 'red')
def __init__(self, opt, nclasses, mean, std, source_trainloader, source_valloader, target_trainloader, target_valloader, res_dir): self.source_trainloader = source_trainloader self.source_valloader = source_valloader self.target_trainloader = target_trainloader self.target_valloader = target_valloader self.opt = opt self.best_val = 0 # Defining networks and optimizers self.nclasses = nclasses self.netG = models._netG(opt, nclasses, flattens=opt.flattens) self.netD = models._netD(opt, nclasses) self.netF = models._netF(opt) self.netC = models._netC(opt, nclasses, flattens=opt.flattens) # Weight initialization self.netG.apply(utils.weights_init) self.netD.apply(utils.weights_init) self.netF.apply(utils.weights_init) self.netC.apply(utils.weights_init) logging.basicConfig(filename='{}/app.log'.format(res_dir), level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(message)s') if True: print('netG<<') print(self.netG) logging.debug(self.netG) print('>>\n') print('netD<<') print(self.netD) logging.debug(self.netD) print('>>\n') print('netF<<') print(self.netF) logging.debug(self.netF) print('>>\n') print('netC<<') print(self.netC) logging.debug(self.netC) print('>>') # Defining loss criterions self.criterion_c = nn.CrossEntropyLoss() self.criterion_s = nn.BCELoss() self.mmd_loss = MMD_loss() self.mse_loss = nn.MSELoss() if opt.gpu >= 0: self.netD.cuda() self.netG.cuda() self.netF.cuda() self.netC.cuda() self.criterion_c.cuda() self.criterion_s.cuda() # Defining optimizers self.optimizerD = optim.Adam(self.netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerG = optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerF = optim.Adam(self.netF.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerC = optim.Adam(self.netC.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) # Other variables self.real_label_val = 1 self.fake_label_val = 0
def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) netG = _netG(ngpu, nz, ngf, nc) netG.apply(weights_init) if opt.netG != '': netG.load_state_dict(torch.load(opt.netG)) print(netG) netD = _netD(ngpu, nc, ndf) netD.apply(weights_init) if opt.netD != '': netD.load_state_dict(torch.load(opt.netD)) print(netD) criterion = nn.BCELoss() input = torch.FloatTensor(opt.batchSize, 3, opt.imageSize, opt.imageSize) noise = torch.FloatTensor(opt.batchSize, nz, 1, 1) fixed_noise = torch.FloatTensor(opt.batchSize, nz, 1, 1).normal_(0, 1) label = torch.FloatTensor(opt.batchSize) real_label = 1 fake_label = 0 if opt.cuda:
def train(): param = _param() print("load dataset origin") dataset_origin = LoadDataset_origin(opt) print("load dataset") dataset = LoadDataset(opt) param.X_dim = dataset.feature_dim data_layer_origin = FeatDataLayer_origin( dataset_origin.labels_train, dataset_origin.pfc_feat_data_train, opt) data_layer = FeatDataLayer_add_FG( dataset.labels_train, dataset.pfc_feat_data_train, opt, dataset.train_text_feature, dataset.familyToText, dataset.genusToText, dataset.familyLabelToBirdLabel, dataset.genusLabelToBirdLabel, dataset.labels_origin_train) result = Result() result_gzsl = Result() netG = _netG(dataset.text_dim, dataset.feature_dim).cuda() netG.apply(weights_init) print(netG) netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda() netD.apply(weights_init) print(netD) exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD' exp_params = 'Eu{}_Rls{}_RWz{}'.format(opt.CENT_LAMBDA, opt.REG_W_LAMBDA, opt.REG_Wz_LAMBDA) out_dir = 'out/{:s}'.format(exp_info) out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params) opt.out_subdir = out_subdir if not os.path.exists('out'): os.mkdir('out') if not os.path.exists(out_dir): os.mkdir(out_dir) if not os.path.exists(out_subdir): os.mkdir(out_subdir) cprint(" The output dictionary is {}".format(out_subdir), 'red') log_dir = out_subdir + '/log_{:s}'.format(exp_info) if opt.exp_no != "": log_dir += "_" + opt.exp_no log_dir += ".txt" opt.log_dir = log_dir opt.auc_plot_dir = out_subdir + '/best_auc_plot{:s}_{:s}'.format( opt.exp_no, exp_info) opt.auc_solid_plot_dir = out_subdir + '/solid_auc_plot{:s}_{:s}'.format( opt.exp_no, exp_info) opt.history_D_loss_dir = out_subdir + '/D_loss_plot{:s}_{:s}'.format( opt.exp_no, exp_info) opt.history_G_loss_dir = out_subdir + '/G_loss_plot{:s}_{:s}'.format( opt.exp_no, exp_info) start_step = 0 if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) netG.load_state_dict(checkpoint['state_dict_G']) netD.load_state_dict(checkpoint['state_dict_D']) start_step = checkpoint['it'] print(checkpoint['log']) else: print("=> no checkpoint found at '{}'".format(opt.resume)) nets = [netG, netD] # tr_cls_centroid = Variable(torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda() optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") history_D_loss = [] history_G_loss = [] for it in range(start_step, 10000 + 1): cur_D_loss = 0 cur_G_loss = 0 """ Discriminator """ for _ in range(5): blobs = data_layer_origin.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array( [dataset_origin.train_text_feature[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() y_true = y_true.to(device=device, dtype=torch.long) # GAN's D loss D_real, C_real = netD(X) D_loss_real = torch.mean(D_real) C_loss_real = F.cross_entropy(C_real, y_true) DC_loss = -D_loss_real + C_loss_real DC_loss.backward() cur_D_loss += DC_loss.item() # GAN's D loss G_sample = netG(z, text_feat).detach() D_fake, C_fake = netD(G_sample) D_loss_fake = torch.mean(D_fake) C_loss_fake = F.cross_entropy(C_fake, y_true) DC_loss = D_loss_fake + C_loss_fake DC_loss.backward() cur_D_loss += DC_loss.item() # train with gradient penalty (WGAN_GP) grad_penalty = calc_gradient_penalty(netD, X.data, G_sample.data) grad_penalty.backward() Wasserstein_D = D_loss_real - D_loss_fake optimizerD.step() reset_grad(nets) cur_D_loss += Wasserstein_D.item() """ Generator """ for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) origin_labels = blobs['minibatch_origin_label'].astype(int) text_feat = blobs['text_feat'] # text_feat # text_feat = np.array([dataset.train_text_feature[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() y_origin_true = Variable( torch.from_numpy(origin_labels.astype('int'))).cuda() y_true = y_true.to(device=device, dtype=torch.long) z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() G_sample = netG(z, text_feat) D_fake, C_fake = netD(G_sample) _, C_real = netD(X) # GAN's G loss G_loss = torch.mean(D_fake) # Auxiliary classification loss C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true)) / 2 GC_loss = -G_loss + C_loss cur_G_loss += -G_loss.item() + F.cross_entropy(C_fake, y_true).item() Bird_Euclidean_loss = Variable(torch.Tensor([0.0])).cuda() if opt.CENT_LAMBDA != 0 and opt.BIRD_CENT_LAMBDA != 0: for i in range(dataset.train_cls_num): sample_idx = (y_origin_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Bird_Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] center = Variable( torch.from_numpy(dataset.tr_cls_centroid[i].astype( 'float32'))).cuda() Bird_Euclidean_loss += (G_sample_cls.mean(dim=0) - center).pow(2).sum().sqrt() Bird_Euclidean_loss *= 1.0 / dataset.train_cls_num Family_Euclidean_loss = Variable(torch.Tensor([0.0])).cuda() if opt.CENT_LAMBDA != 0 and opt.FAMILY_CENT_LAMBDA != 0: for i in range(dataset.familyLabelStart, dataset.familyLabelEnd): sample_idx = (y_origin_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Family_Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] center = Variable( torch.from_numpy(dataset.tr_cls_centroid[i].astype( 'float32'))).cuda() Family_Euclidean_loss += (G_sample_cls.mean(dim=0) - center).pow(2).sum().sqrt() Family_Euclidean_loss *= 1.0 / (dataset.familyLabelEnd - dataset.familyLabelStart) Genus_Euclidean_loss = Variable(torch.Tensor([0.0])).cuda() if opt.CENT_LAMBDA != 0 and opt.GENUS_CENT_LAMBDA != 0: for i in range(dataset.genusLabelStart, dataset.genusLabelEnd): sample_idx = (y_origin_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Genus_Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] center = Variable( torch.from_numpy(dataset.tr_cls_centroid[i].astype( 'float32'))).cuda() Genus_Euclidean_loss += (G_sample_cls.mean(dim=0) - center).pow(2).sum().sqrt() Genus_Euclidean_loss *= 1.0 / (dataset.genusLabelEnd - dataset.genusLabelStart) Euclidean_loss = opt.CENT_LAMBDA * ( opt.BIRD_CENT_LAMBDA * Bird_Euclidean_loss + opt.FAMILY_CENT_LAMBDA * Family_Euclidean_loss + opt.GENUS_CENT_LAMBDA * Genus_Euclidean_loss) # ||W||_2 regularization reg_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG.named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) # ||W_z||21 regularization, make W_z sparse reg_Wz_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_Wz_LAMBDA != 0: Wz = netG.rdc_text.weight reg_Wz_loss = Wz.pow(2).sum(dim=0).sqrt().sum().mul( opt.REG_Wz_LAMBDA) all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss all_loss.backward() optimizerG.step() reset_grad(nets) cur_G_loss += Euclidean_loss.item() history_D_loss.append(cur_D_loss) history_G_loss.append(cur_G_loss) print("Iter-" + str(it + 1) + "; G-loss: " + str(cur_G_loss) + "; D-loss: " + str(cur_D_loss)) if it % opt.disp_interval == 0 and it: acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) log_text = 'Iter-{}; Was_D: {:.4}; Euc_ls: {:.4}; Bird_Euc_ls: {:.4}; family_Euc_ls: {:.4}; ' \ 'Genus_Euc_ls: {:.4}; reg_ls: {:.4}; Wz_ls: {:.4}; G_loss: {:.4}; D_loss_real: {:.4};' \ ' D_loss_fake: {:.4}; rl: {:.4}%; fk: {:.4}%' \ .format(it, Wasserstein_D.item(), Euclidean_loss.item(), Bird_Euclidean_loss.item(), Family_Euclidean_loss.item(), Genus_Euclidean_loss.item(), reg_loss.item(), reg_Wz_loss.item(), G_loss.item(), D_loss_real.item(), D_loss_fake.item(), acc_real * 100, acc_fake * 100) print(log_text) if it % opt.evl_interval == 0 and it >= 100: netG.eval() eval_fakefeat_test(it, netG, dataset_origin, param, result) eval_fakefeat_GZSL(it, netG, dataset_origin, param, result_gzsl) if result.save_model: files2remove = glob.glob( out_subdir + '/Best_model{}_Acc*'.format(opt.exp_no)) for _i in files2remove: os.remove(_i) torch.save( { 'it': it + 1, 'state_dict_G': netG.state_dict(), 'state_dict_D': netD.state_dict(), 'random_seed': opt.manualSeed, 'log': log_text, }, out_subdir + '/Best_model{}_Acc_{:.2f}.tar'.format( opt.exp_no, result.acc_list[-1])) if result_gzsl.save_model: files2remove = glob.glob( out_subdir + '/Best_model{}_Auc*'.format(opt.exp_no)) for _i in files2remove: os.remove(_i) torch.save( { 'it': it + 1, 'state_dict_G': netG.state_dict(), 'state_dict_D': netD.state_dict(), 'random_seed': opt.manualSeed, 'log': log_text, }, out_subdir + '/Best_model{}_Auc_{:.2f}.tar'.format( opt.exp_no, result_gzsl.best_auc * 100)) netG.train() if it % opt.save_interval == 0 and it: torch.save( { 'it': it + 1, 'state_dict_G': netG.state_dict(), 'state_dict_D': netD.state_dict(), 'random_seed': opt.manualSeed, 'log': log_text, }, out_subdir + '/Iter_{:d}.tar'.format(it)) cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it), 'red') print("Reproduce CUB {}".format(opt.splitmode)) print("Accuracy is {:.4}%, and Generalized AUC is {:.4}%".format( result.best_acc, result_gzsl.best_auc * 100)) np.savetxt(opt.history_D_loss_dir + '.txt', history_D_loss, fmt='%.015f') np.savetxt(opt.history_G_loss_dir + '.txt', history_G_loss, fmt='%.015f')
batch_size=batch_size, shuffle=True, num_workers=4) print('data is ready!') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') vi_fea_dim = zsl_dataset.vis_fea_dim se_fea_dim = zsl_dataset.sem_fea_dim n_tr_class = zsl_dataset.n_tr_class z_dim = args.z_dim if args.use_z.lower() == 'true': netG = _netG(se_fea_dim, vi_fea_dim, z_dim).to(device) else: netG = _netG2(se_fea_dim, vi_fea_dim).to(device) netD = _netD(vi_fea_dim, n_tr_class).to(device) netR = Regressor(vi_fea_dim, se_fea_dim).to(device) nets = [netG, netD, netR] nets_weights_init(nets) print_nets(nets) te_data_unseen, te_data_seen = zsl_dataset.get_testData() te_vis_fea_unseen, te_sem_fea_unseen, te_label_unseen, te_labelID_unseen, te_sem_fea_pro_unseen = te_data_unseen te_vis_fea_seen, te_sem_fea_seen, te_label_seen, te_labelID_seen, te_sem_fea_pro_seen = te_data_seen tr_vis_fea, tr_sem_fea, all_tr_label, tr_labelID, tr_sem_fea_pro = zsl_dataset.get_trainData( ) tr_cls_centroid = zsl_dataset.get_tr_centroid() tr_cls_centroid = torch.from_numpy(tr_cls_centroid).to(device) which_optimizer = args.optimizer.lower()
def __init__(self, args): self.args = args Path(args.saver_root).mkdir(parents=True, exist_ok=True) if args.exp == 'MNIST': self.log('Running MNIST -> MNIST-M') dataloders = datasets.form_mnist_dataset(args) elif args.exp == 'VISDA': # TODO: Include VISDA pass self.s_trainloader = dataloders['s_train'] self.s_valloader = dataloders['s_val'] self.t_trainloader = dataloders['t_train'] self.t_valloader = dataloders['t_val'] self.s_trainloader_classwise = dataloders['s_classwise'] nclasses = self.nclasses = dataloders['nclasses'] self.s_classwise_iterators = [] for i in range(len(self.s_trainloader_classwise)): self.s_classwise_iterators.append( iter(self.s_trainloader_classwise[i])) ############################### # Create models self.netF = models._netF().cuda() self.netC = models._netC(self.nclasses).cuda() if args.alg == 'wasserstein' or args.alg == 'NW': self.netD = models._netD_wasserstein().cuda() else: self.netD = models._netD().cuda() # Create optimizers if args.adam: self.optimizerF = optim.Adam(self.netF.parameters(), lr=args.lr, betas=(0.5, 0.999)) self.optimizerC = optim.Adam(self.netC.parameters(), lr=args.lr, betas=(0.5, 0.999)) self.optimizerD = optim.Adam(self.netD.parameters(), lr=args.lr, betas=(0.5, 0.999)) if args.alg == 'NW': self.pi = nn.Parameter( torch.FloatTensor(nclasses).fill_(1.0 / nclasses).cuda()) self.optimizerPi = optim.Adam(iter([self.pi]), lr=args.lrPi, betas=(0.5, 0.999)) else: self.optimizerF = optim.SGD(self.netF.parameters(), lr=args.lr, momentum=0.9) self.optimizerC = optim.SGD(self.netC.parameters(), lr=args.lr, momentum=0.9) self.optimizerD = optim.SGD(self.netD.parameters(), lr=args.lr, momentum=0.9) if args.alg == 'NW': self.pi = nn.Parameter( torch.FloatTensor(nclasses).fill_(1.0 / nclasses).cuda()) self.optimizerPi = optim.SGD(iter([self.pi]), lr=args.lrPi)
def __init__(self, opt, nclasses, mean, std, source_trainloader, source_valloader, target_trainloader, target_valloader): self.source_trainloader = source_trainloader self.source_valloader = source_valloader self.target_trainloader = target_trainloader self.target_valloader = target_valloader self.opt = opt self.mean = mean self.std = std self.best_val = 0 # Defining networks and optimizers self.nclasses = nclasses self.netG = models._netG(opt, nclasses) self.netD = models._netD(opt, nclasses) self.netF = models._netF(opt) self.netC = models._netC(opt, nclasses) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs self.netG = nn.DataParallel(self.netG) self.netD = nn.DataParallel(self.netD) self.netF = nn.DataParallel(self.netF) self.netC = nn.DataParallel(self.netC) # Weight initialization self.netG.apply(utils.weights_init) self.netD.apply(utils.weights_init) self.netF.apply(utils.weights_init) self.netC.apply(utils.weights_init) if opt.loadExisting != 0: netF_path = os.path.join(opt.checkpoint_dir, 'model_best_netF_sourceonly.pth') netC_path = os.path.join(opt.checkpoint_dir, 'model_best_netC_sourceonly.pth') netG_path = os.path.join(opt.checkpoint_dir, 'model_best_netG.pth') netD_path = os.path.join(opt.checkpoint_dir, 'model_best_netD.pth') if os.path.isfile(netF_path): self.netF.load_state_dict(torch.load(netF_path)) if os.path.isfile(netC_path): self.netC.load_state_dict(torch.load(netC_path)) if os.path.isfile(netG_path): self.netG.load_state_dict(torch.load(netG_path)) if os.path.isfile(netD_path): self.netD.load_state_dict(torch.load(netD_path)) # Defining loss criterions self.criterion_c = nn.CrossEntropyLoss() self.criterion_s = nn.BCELoss() if opt.gpu>=0: self.netD.cuda() self.netG.cuda() self.netF.cuda() self.netC.cuda() self.criterion_c.cuda() self.criterion_s.cuda() # Defining optimizers self.optimizerD = optim.Adam(self.netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerG = optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerF = optim.Adam(self.netF.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizerC = optim.Adam(self.netC.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) # Other variables self.real_label_val = 1 self.fake_label_val = 0
def train(creative_weight=1000, model_num=1, is_val=True): param = _param() if opt.dataset == 'CUB': dataset = LoadDataset(opt, main_dir, is_val) exp_info = 'CUB_EASY' if opt.splitmode == 'easy' else 'CUB_HARD' elif opt.dataset == 'NAB': dataset = LoadDataset_NAB(opt, main_dir, is_val) exp_info = 'NAB_EASY' if opt.splitmode == 'easy' else 'NAB_HARD' else: print('No Dataset with that name') sys.exit(0) param.X_dim = dataset.feature_dim opt.Creative_weight = creative_weight data_layer = FeatDataLayer(dataset.labels_train, dataset.pfc_feat_data_train, opt) result = Result() ones = Variable(torch.Tensor(1, 1)) ones.data.fill_(1.0) netG = _netG(dataset.text_dim, dataset.feature_dim).cuda() netG.apply(weights_init) if model_num == 6: netD = _netD(dataset.train_cls_num + 1, dataset.feature_dim).cuda() else: netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda() netD.apply(weights_init) if model_num == 2: log_SM_ab = Scale(2) log_SM_ab = nn.DataParallel(log_SM_ab).cuda() elif model_num == 4 or model_num == 5: log_SM_ab = Scale(1) log_SM_ab = nn.DataParallel(log_SM_ab).cuda() exp_params = 'Model_{}_CAN{}_Eu{}_Rls{}_RWz{}_{}'.format( model_num, opt.Creative_weight, opt.CENT_LAMBDA, opt.REG_W_LAMBDA, opt.REG_Wz_LAMBDA, opt.exp_name) out_subdir = main_dir + 'out/{:s}/{:s}'.format(exp_info, exp_params) if not os.path.exists(out_subdir): os.makedirs(out_subdir) log_dir = out_subdir + '/log_{:s}.txt'.format(exp_info) with open(log_dir, 'a') as f: f.write('Training Start:') f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n') start_step = 0 if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) netG.load_state_dict(checkpoint['state_dict_G']) netD.load_state_dict(checkpoint['state_dict_D']) start_step = checkpoint['it'] print(checkpoint['log']) else: print("=> no checkpoint found at '{}'".format(opt.resume)) if model_num == 2 or model_num == 4 or model_num == 5: nets = [netG, netD, log_SM_ab] else: nets = [netG, netD] tr_cls_centroid = Variable( torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda() optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9)) if model_num == 2 or model_num == 4 or model_num == 5: optimizer_SM_ab = optim.Adam(log_SM_ab.parameters(), lr=opt.lr, betas=(0.5, 0.999)) for it in tqdm(range(start_step, 3000 + 1)): # Creative Loss blobs = data_layer.forward() labels = blobs['labels'].astype(int) new_class_labels = Variable( torch.from_numpy(np.ones_like(labels) * dataset.train_cls_num)).cuda() text_feat_1 = np.array( [dataset.train_text_feature[i, :] for i in labels]) text_feat_2 = np.array( [dataset.train_text_feature[i, :] for i in labels]) np.random.shuffle( text_feat_1 ) # Shuffle both features to guarantee different permutations np.random.shuffle(text_feat_2) alpha = (np.random.random(len(labels)) * (.8 - .2)) + .2 text_feat_mean = np.multiply(alpha, text_feat_1.transpose()) text_feat_mean += np.multiply(1. - alpha, text_feat_2.transpose()) text_feat_mean = text_feat_mean.transpose() text_feat_mean = normalize(text_feat_mean, norm='l2', axis=1) text_feat_Creative = Variable( torch.from_numpy(text_feat_mean.astype('float32'))).cuda() z_creative = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() G_creative_sample = netG(z_creative, text_feat_Creative) """ Discriminator """ for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array( [dataset.train_text_feature[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() # GAN's D loss D_real, C_real = netD(X) D_loss_real = torch.mean(D_real) C_loss_real = F.cross_entropy(C_real, y_true) DC_loss = -D_loss_real + C_loss_real DC_loss.backward() # GAN's D loss G_sample = netG(z, text_feat).detach() D_fake, C_fake = netD(G_sample) D_loss_fake = torch.mean(D_fake) C_loss_fake = F.cross_entropy(C_fake, y_true) DC_loss = D_loss_fake + C_loss_fake DC_loss.backward() # train with gradient penalty (WGAN_GP) grad_penalty = calc_gradient_penalty(netD, X.data, G_sample.data) grad_penalty.backward() Wasserstein_D = D_loss_real - D_loss_fake optimizerD.step() reset_grad(nets) """ Generator """ for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array( [dataset.train_text_feature[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() G_sample = netG(z, text_feat) D_fake, C_fake = netD(G_sample) _, C_real = netD(X) # GAN's G loss G_loss = torch.mean(D_fake) # Auxiliary classification loss C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true)) / 2 GC_loss = -G_loss + C_loss # Centroid loss Euclidean_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for i in range(dataset.train_cls_num): sample_idx = (y_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] Euclidean_loss += ( G_sample_cls.mean(dim=0) - tr_cls_centroid[i]).pow(2).sum().sqrt() Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA # ||W||_2 regularization reg_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG.named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) # ||W_z||21 regularization, make W_z sparse reg_Wz_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_Wz_LAMBDA != 0: Wz = netG.rdc_text.weight reg_Wz_loss = Wz.pow(2).sum(dim=0).sqrt().sum().mul( opt.REG_Wz_LAMBDA) # D(C| GX_fake)) + Classify GX_fake as real D_creative_fake, C_creative_fake = netD(G_creative_sample) if model_num == 1: # KL Divergence G_fake_C = F.log_softmax(C_creative_fake) else: G_fake_C = F.softmax(C_creative_fake) if model_num == 1: # KL Divergence entropy_GX_fake = (G_fake_C / G_fake_C.data.size(1)).mean() elif model_num == 2: # SM Divergence q_shape = Variable( torch.FloatTensor(G_fake_C.data.size(0), G_fake_C.data.size(1))).cuda() q_shape.data.fill_(1.0 / G_fake_C.data.size(1)) SM_ab = F.sigmoid(log_SM_ab(ones)) SM_a = 0.2 + torch.div(SM_ab[0][0], 1.6666666666666667).cuda() SM_b = 0.2 + torch.div(SM_ab[0][1], 1.6666666666666667).cuda() pow_a_b = torch.div(1 - SM_a, 1 - SM_b) alpha_term = (torch.pow(G_fake_C + 1e-5, SM_a) * torch.pow(q_shape, 1 - SM_a)).sum(1) entropy_GX_fake_vec = torch.div( torch.pow(alpha_term, pow_a_b) - 1, SM_b - 1) elif model_num == 3: # Bachatera Divergence q_shape = Variable( torch.FloatTensor(G_fake_C.data.size(0), G_fake_C.data.size(1))).cuda() q_shape.data.fill_(1.0 / G_fake_C.data.size(1)) SM_a = Variable(torch.FloatTensor(1, 1)).cuda() SM_a.data.fill_(opt.SM_Alpha) SM_b = Variable(torch.FloatTensor(1, 1)).cuda() SM_b.data.fill_(opt.SM_Alpha) pow_a_b = torch.div(1 - SM_a, 1 - SM_b) alpha_term = (torch.pow(G_fake_C + 1e-5, SM_a) * torch.pow(q_shape, 1 - SM_a)).sum(1) entropy_GX_fake_vec = -torch.div( torch.pow(alpha_term, pow_a_b) - 1, SM_b - 1) elif model_num == 4: # Tsallis Divergence q_shape = Variable( torch.FloatTensor(G_fake_C.data.size(0), G_fake_C.data.size(1))).cuda() q_shape.data.fill_(1.0 / G_fake_C.data.size(1)) SM_ab = F.sigmoid(log_SM_ab(ones)) SM_a = 0.2 + torch.div(SM_ab[0][0], 1.6666666666666667).cuda() SM_b = SM_a pow_a_b = torch.div(1 - SM_a, 1 - SM_b) alpha_term = (torch.pow(G_fake_C + 1e-5, SM_a) * torch.pow(q_shape, 1 - SM_a)).sum(1) entropy_GX_fake_vec = -torch.div( torch.pow(alpha_term, pow_a_b) - 1, SM_b - 1) elif model_num == 5: # Renyi Divergence q_shape = Variable( torch.FloatTensor(G_fake_C.data.size(0), G_fake_C.data.size(1))).cuda() q_shape.data.fill_(1.0 / G_fake_C.data.size(1)) SM_ab = F.sigmoid(log_SM_ab(ones)) SM_a = 0.2 + torch.div(SM_ab[0][0], 1.6666666666666667).cuda() SM_b = Variable(torch.FloatTensor(1, 1)).cuda() SM_b.data.fill_(opt.SM_Beta) pow_a_b = torch.div(1 - SM_a, 1 - SM_b) alpha_term = (torch.pow(G_fake_C + 1e-5, SM_a) * torch.pow(q_shape, 1 - SM_a)).sum(1) entropy_GX_fake_vec = -torch.div( torch.pow(alpha_term, pow_a_b) - 1, SM_b - 1) if model_num == 6: loss_creative = F.cross_entropy(C_creative_fake, new_class_labels) else: if model_num != 1: # Normalize SM-Divergence & Report mean min_e, max_e = torch.min(entropy_GX_fake_vec), torch.max( entropy_GX_fake_vec) entropy_GX_fake_vec = (entropy_GX_fake_vec - min_e) / (max_e - min_e) entropy_GX_fake = -entropy_GX_fake_vec.mean() loss_creative = -opt.Creative_weight * entropy_GX_fake disc_GX_fake_real = -torch.mean(D_creative_fake) total_loss_creative = loss_creative + disc_GX_fake_real all_loss = GC_loss + Euclidean_loss + reg_loss + reg_Wz_loss + total_loss_creative all_loss.backward() if model_num == 2 or model_num == 4 or model_num == 5: optimizer_SM_ab.step() optimizerG.step() reset_grad(nets) if it % opt.disp_interval == 0 and it: acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) log_text = 'Iter-{}; rl: {:.4}%; fk: {:.4}%'.format( it, acc_real * 100, acc_fake * 100) with open(log_dir, 'a') as f: f.write(log_text + '\n') if it % opt.evl_interval == 0 and it > opt.disp_interval: netG.eval() cur_acc = eval_fakefeat_test(it, netG, dataset, param, result) cur_auc = eval_fakefeat_GZSL(netG, dataset, param, out_subdir, result) if cur_acc > result.best_acc: result.best_acc = cur_acc if cur_auc > result.best_auc: result.best_auc = cur_auc if it % opt.save_interval: files2remove = glob.glob(out_subdir + '/Best_model*') for _i in files2remove: os.remove(_i) torch.save( { 'it': it + 1, 'state_dict_G': netG.state_dict(), 'state_dict_D': netD.state_dict(), 'random_seed': opt.manualSeed, 'log': log_text, }, out_subdir + '/Best_model_AUC_{:.2f}.tar'.format(cur_auc)) netG.train() return result
def train(): param = _param() dataset = DATA_LOADER(opt) param.X_dim = dataset.feature_dim data_layer = FeatDataLayer(dataset.train_label.numpy(), dataset.train_feature.numpy(), opt) result = Result() result_gzsl = Result() netG = _netG_att(opt, dataset.text_dim, dataset.feature_dim).cuda() netG.apply(weights_init) print(netG) netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda() netD.apply(weights_init) print(netD) netG2 = _netG2_att(opt, dataset.text_dim, dataset.feature_dim).cuda() netG2.apply(weights_init) print(netG2) netD2 = _netD2_att(dataset.text_dim, dataset.train_cls_num).cuda() netD2.apply(weights_init) print(netD2) exp_info = 'GBU_{}'.format(opt.dataset) exp_params = 'Eu{}_Rls{}'.format(opt.CENT_LAMBDA, opt.REG_W_LAMBDA) out_dir = 'out/{:s}'.format(exp_info) out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params) if not os.path.exists('out'): os.mkdir('out') if not os.path.exists(out_dir): os.mkdir(out_dir) if not os.path.exists(out_subdir): os.mkdir(out_subdir) cprint(" The output dictionary is {}".format(out_subdir), 'red') log_dir = out_subdir + '/log_{:s}_{}.txt'.format(exp_info, opt.exp_idx) with open(log_dir, 'w') as f: f.write('Training Start:') f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n') start_step = 0 if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) netG.load_state_dict(checkpoint['state_dict_G']) netD.load_state_dict(checkpoint['state_dict_D']) netG2.load_state_dict(checkpoint['state_dict_G2']) netD2.load_state_dict(checkpoint['state_dict_D2']) start_step = checkpoint['it'] print(checkpoint['log']) else: print("=> no checkpoint found at '{}'".format(opt.resume)) nets = [netG, netD, netD2, netD2] tr_cls_centroid = Variable( torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda() optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerD2 = optim.Adam(netD2.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerG2 = optim.Adam(netG2.parameters(), lr=opt.lr, betas=(0.5, 0.9)) for it in range(start_step, 10000 + 1): """ Discriminator """ for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_att[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() # GAN's D loss D_real, C_real = netD(X) D_loss_real = torch.mean(D_real) C_loss_real = F.cross_entropy(C_real, y_true) DC_loss = opt.Adv_LAMBDA * (-D_loss_real + C_loss_real) DC_loss.backward() # GAN's D loss G_sample = netG(z, text_feat).detach() D_fake, C_fake = netD(G_sample) D_loss_fake = torch.mean(D_fake) C_loss_fake = F.cross_entropy(C_fake, y_true) DC_loss = opt.Adv_LAMBDA * (D_loss_fake + C_loss_fake) DC_loss.backward() # train with gradient penalty (WGAN_GP) grad_penalty = opt.Adv_LAMBDA * calc_gradient_penalty( netD, X.data, G_sample.data) grad_penalty.backward() Wasserstein_D = D_loss_real - D_loss_fake optimizerD.step() reset_grad(nets) """ Generator """ for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_att[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() G_sample = netG(z, text_feat) D_fake, C_fake = netD(G_sample) _, C_real = netD(X) # GAN's G loss G_loss = torch.mean(D_fake) # Auxiliary classification loss C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true)) / 2 GC_loss = opt.Adv_LAMBDA * (-G_loss + C_loss) # Centroid loss Euclidean_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for i in range(dataset.train_cls_num): sample_idx = (y_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] Euclidean_loss += ( G_sample_cls.mean(dim=0) - tr_cls_centroid[i]).pow(2).sum().sqrt() Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA # ||W||_2 regularization reg_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG.named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) all_loss = GC_loss + Euclidean_loss + reg_loss all_loss.backward() optimizerG.step() reset_grad(nets) """D2""" for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_att[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() z2 = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() # G1 results visual_sample = netG(z, text_feat) # real loss D2_real = netD2(text_feat) D2_loss_real = torch.mean(D2_real) #C2_loss_real = F.cross_entropy(C2_real, y_true) DC2_loss = -D2_loss_real #+ C2_loss_real DC2_loss.backward() # fake loss text_sample = netG2(z2, visual_sample).detach() D2_fake = netD2(text_sample) D2_loss_fake = torch.mean(D2_fake) #C2_loss_fake = F.cross_entropy(C2_fake, y_true) DC2_loss = D2_loss_fake #+ C2_loss_fake DC2_loss.backward() # train with gradient penalty (WGAN_GP) grad_penalty = calc_gradient_penalty1(netD2, text_feat.data, text_sample.data) grad_penalty.backward() Wasserstein_D2 = D2_loss_real - D2_loss_fake optimizerD2.step() reset_grad(nets) """G2""" for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_att[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() text_sample = netG2(z, X) D2_fake = netD2(text_sample) #_, C2_real = netD2(text_feat) # GAN's G loss G2_loss = torch.mean(D2_fake) # Auxiliary classification loss #C2_loss = (F.cross_entropy(C2_real, y_true) + F.cross_entropy(C2_fake, y_true)) / 2 GC2_loss = -G2_loss #+ C2_loss # ||W||_2 regularization reg_loss2 = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG2.named_parameters(): if 'weight' in name: reg_loss2 += p.pow(2).sum() reg_loss2.mul_(opt.REG_W_LAMBDA) # ||W||_2 regularization all_loss = GC2_loss + reg_loss2 all_loss.backward() optimizerG2.step() reset_grad(nets) """Cycle Loss""" for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_att[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() z2 = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() G_sample = netG(z, text_feat) text_sample = netG2(z2, G_sample) cycle_loss = 10 * torch.nn.MSELoss()(text_feat, text_sample) cycle_loss.backward() optimizerG.step() optimizerG2.step() reset_grad(nets) if it % opt.disp_interval == 0 and it: acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) log_text = 'Iter-{}; Was_D: {:.3f}; Was_D2: {:.3f}; Euc_ls: {:.3f}; reg_ls: {:.3f}; reg_ls2: {:.3f}; \n' \ 'G_loss: {:.3f};G2_loss: {:.3f}; D_loss_real: {:.3f};D2_loss_real: {:.3f}; D_loss_fake: {:.3f};' \ 'D2_loss_fake: {:.3f}; rl: {:.2f}%; fk: {:.2f}%;cycle: {:.3f} \n'\ .format(it, Wasserstein_D.item(), Wasserstein_D2.item(), Euclidean_loss.item(), reg_loss.item(),reg_loss2.item(), G_loss.item(),G2_loss.item(), D_loss_real.item(), D2_loss_real.item(),D_loss_fake.item(),D2_loss_fake.item(), acc_real * 100, acc_fake * 100,cycle_loss) print(log_text) with open(log_dir, 'a') as f: f.write(log_text + '\n') if it % opt.evl_interval == 0 and it >= 100: netG.eval() eval_fakefeat_test(it, netG, dataset, param, result) if result.save_model: files2remove = glob.glob(out_subdir + '/Best_model_ZSL_*') for _i in files2remove: os.remove(_i) # best_acc = result.acc_list[-1] save_model( it, netG, netD, netG2, netD2, opt.manualSeed, log_text, out_subdir + '/Best_model_ZSL_Acc_{:.2f}.tar'.format( result.acc_list[-1])) eval_fakefeat_test_gzsl(it, netG, dataset, param, result_gzsl) if result.save_model: files2remove = glob.glob(out_subdir + '/Best_model_GZSL_*') for _i in files2remove: os.remove(_i) # best_acc_gzsl = result.acc_list[-1] save_model( it, netG, netD, netG2, netD2, opt.manualSeed, log_text, out_subdir + '/Best_model_GZSL_H_{:.2f}_S_{:.2f}_U_{:.2f}.tar'.format( result_gzsl.best_acc, result_gzsl.best_acc_S_T, result_gzsl.best_acc_U_T)) netG.train() if it % opt.save_interval == 0 and it: save_model(it, netG, netD, netG2, netD2, opt.manualSeed, log_text, out_subdir + '/Iter_{:d}.tar'.format(it)) cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it), 'red')