def sample(self, datapath, stage=1): if stage == 1: netG, _ = self.load_network_stageI() else: netG, _ = self.load_network_stageII() netG.eval() # Load text embeddings generated from the encoder t_file = torchfile.load(datapath) captions_list = t_file.raw_txt embeddings = np.concatenate(t_file.fea_txt, axis=0) num_embeddings = len(captions_list) print('Successfully load sentences from: ', datapath) print('Total number of sentences:', num_embeddings) print('num_embeddings:', num_embeddings, embeddings.shape) # path to save generated samples save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')] mkdir_p(save_dir) batch_size = np.minimum(num_embeddings, self.batch_size) nz = cfg.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)) if cfg.CUDA: noise = noise.cuda() count = 0 while count < num_embeddings: if count > 3000: break iend = count + batch_size if iend > num_embeddings: iend = num_embeddings count = num_embeddings - batch_size embeddings_batch = embeddings[count:iend] # captions_batch = captions_list[count:iend] txt_embedding = Variable(torch.FloatTensor(embeddings_batch)) if cfg.CUDA: txt_embedding = txt_embedding.cuda() ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) inputs = (txt_embedding, noise) _, fake_imgs, mu, logvar = \ nn.parallel.data_parallel(netG, inputs, self.gpus) for i in range(batch_size): save_name = '%s/%d.png' % (save_dir, count + i) im = fake_imgs[i].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) im.save(save_name) count += batch_size
def evaluate(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: # Build and load the generator netG = G_NET() netG.apply(weights_init) netG = torch.nn.DataParallel(netG, device_ids=self.gpus) print(netG) # state_dict = torch.load(cfg.TRAIN.NET_G) state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load ', cfg.TRAIN.NET_G) # the path to save generated images s_tmp = cfg.TRAIN.NET_G istart = s_tmp.rfind('_') + 1 iend = s_tmp.rfind('.') iteration = int(s_tmp[istart:iend]) s_tmp = s_tmp[:s_tmp.rfind('/')] save_dir = '%s/iteration%d/%s' % (s_tmp, iteration, split_dir) if cfg.TEST.B_EXAMPLE: folder = '%s/super' % (save_dir) else: folder = '%s/single' % (save_dir) print('Make a new folder: ', folder) mkdir_p(folder) nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(self.batch_size, nz)) if cfg.CUDA: netG.cuda() noise = noise.cuda() # switch to evaluate mode netG.eval() num_batches = int(cfg.TEST.SAMPLE_NUM / self.batch_size) cnt = 0 for step in xrange(num_batches): noise.data.normal_(0, 1) fake_imgs, _, _ = netG(noise) if cfg.TEST.B_EXAMPLE: self.save_superimages(fake_imgs[-1], folder, cnt, 256) else: self.save_singleimages(fake_imgs[-1], folder, cnt, 256) # self.save_singleimages(fake_imgs[-2], folder, 128) # self.save_singleimages(fake_imgs[-3], folder, 64) cnt += self.batch_size
def save_singleimages(self, images, filenames, save_dir, split_dir, sentenceID, imsize): for i in range(images.size(0)): s_tmp = '%s/single_samples/%s/%s' %\ (save_dir, split_dir, filenames[i]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) fullpath = '%s_%d_sentence%d.png' % (s_tmp, imsize, sentenceID) # range from [-1, 1] to [0, 255] img = images[i].add(1).div(2).mul(255).clamp(0, 255).byte() ndarr = img.permute(1, 2, 0).data.cpu().numpy() im = Image.fromarray(ndarr) im.save(fullpath)
def save_superimages(self, images_list, filenames, save_dir, split_dir, imsize): batch_size = images_list[0].size(0) num_sentences = len(images_list) for i in range(batch_size): s_tmp = '%s/super/%s/%s' %\ (save_dir, split_dir, filenames[i]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) # savename = '%s_%d.png' % (s_tmp, imsize) super_img = [] for j in range(num_sentences): img = images_list[j][i] # print(img.size()) img = img.view(1, 3, imsize, imsize) # print(img.size()) super_img.append(img) # break super_img = torch.cat(super_img, 0) vutils.save_image(super_img, savename, nrow=10, normalize=True)
def __init__(self, output_dir): if cfg.TRAIN.FLAG: self.model_dir = os.path.join(output_dir, 'Model') self.image_dir = os.path.join(output_dir, 'Image') self.log_dir = os.path.join(output_dir, 'Log') mkdir_p(self.model_dir) mkdir_p(self.image_dir) mkdir_p(self.log_dir) self.summary_writer = FileWriter(self.log_dir) self.max_epoch = cfg.TRAIN.MAX_EPOCH self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL s_gpus = cfg.GPU_ID.split(',') self.gpus = [int(ix) for ix in s_gpus] self.num_gpus = len(self.gpus) self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus torch.cuda.set_device(self.gpus[0]) cudnn.benchmark = True
def __init__(self, output_dir, data_loader, imsize): if cfg.TRAIN.FLAG: self.model_dir = os.path.join(output_dir, 'Model') self.image_dir = os.path.join(output_dir, 'Image') self.log_dir = os.path.join(output_dir, 'Log') mkdir_p(self.model_dir) mkdir_p(self.image_dir) mkdir_p(self.log_dir) self.summary_writer = FileWriter(self.log_dir) s_gpus = cfg.GPU_ID.split(',') self.gpus = [int(ix) for ix in s_gpus] self.num_gpus = len(self.gpus) torch.cuda.set_device(self.gpus[0]) cudnn.benchmark = True self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus self.max_epoch = cfg.TRAIN.MAX_EPOCH self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL self.data_loader = data_loader self.num_batches = len(self.data_loader)
def __init__(self, output_dir): if cfg.TRAIN.FLAG: self.model_dir = os.path.join(output_dir, 'Model') self.image_dir = os.path.join(output_dir, 'Image') self.log_dir = os.path.join(output_dir, 'Log') mkdir_p(self.model_dir) mkdir_p(self.image_dir) mkdir_p(self.log_dir) self.summary_writer = FileWriter(self.log_dir) self.max_epoch = cfg.TRAIN.MAX_EPOCH self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL s_gpus = cfg.GPU_ID.split(',') self.gpus = [int(ix) for ix in s_gpus] self.num_gpus = len(self.gpus) self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus torch.cuda.set_device(self.gpus[0]) cudnn.benchmark = True #path = "../data/birds/birds.en.vec" path = os.path.join(cfg.DATA_DIR, cfg.DATASET_NAME + ".en.vec") txt_dico, _txt_emb = load_external_embeddings(path) #params.src_dico = src_dico txt_emb = nn.Embedding(len(txt_dico), 300, sparse=False) txt_emb.weight.data.copy_(_txt_emb) txt_emb.weight.requires_grad = False self.txt_dico = txt_dico self.txt_emb = txt_emb self.vis = visdom.Visdom(server='http://bvisionserver9.cs.unc.edu', port=8088, env="birds_spv2") self.vis_win1 = self.vis.images(np.ones((64, 3, 64, 64))) self.vis_win2 = self.vis.images(np.ones((64, 3, 64, 64))) self.vis_win3 = self.vis.images(np.ones((64, 3, 64, 64))) self.vis_txt1 = self.vis.text('')
def __init__(self, output_dir, data_loader, dataset): if cfg.TRAIN.FLAG: self.model_dir = os.path.join(output_dir, 'Model') self.image_dir = os.path.join(output_dir, 'Image') self.score_dir = os.path.join(output_dir, 'Score') mkdir_p(self.model_dir) mkdir_p(self.image_dir) mkdir_p(self.score_dir) if len(cfg.GPU_IDS) == 1 and cfg.GPU_IDS[0] >= 0: torch.cuda.set_device(0) cudnn.benchmark = True self.batch_size = cfg.TRAIN.BATCH_SIZE self.max_epoch = cfg.TRAIN.MAX_EPOCH self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL self.print_interval = cfg.TRAIN.PRINT_INTERVAL self.display_interval = cfg.TRAIN.DISPLAY_INTERVAL self.n_words = dataset.n_words self.ixtoword = dataset.ixtoword self.cats_dict = dataset.cats_dict self.cats_index_dict = dataset.cats_index_dict self.data_loader = data_loader self.num_batches = len(self.data_loader) self.device = torch.device("cuda" if cfg.CUDA else "cpu") self.vgg_model = vgg19_bn(pretrained=True) if cfg.CUDA: self.vgg_model = self.vgg_model.cuda() if len(cfg.GPU_IDS) > 1: self.vgg_model = nn.DataParallel(self.vgg_model) self.vgg_model.to(self.device) self.vgg_model.eval()
def genDiscOutputs(self, split_dir, num_samples=57140): if cfg.TRAIN.NET_G == '': logger.error('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.to(cfg.DEVICE) netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) ###HACK state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) text_encoder = text_encoder.to(cfg.DEVICE) text_encoder.eval() logger.info('Loaded text encoder from: %s', cfg.TRAIN.NET_E) batch_size = self.batch_size[0] nz = cfg.GAN.GLOBAL_Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)).to(cfg.DEVICE) local_noise = Variable( torch.FloatTensor(batch_size, cfg.GAN.LOCAL_Z_DIM)).to(cfg.DEVICE) model_dir = cfg.TRAIN.NET_G state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict["netG"]) for keys in state_dict.keys(): print(keys) logger.info('Load G from: %s', model_dir) max_objects = 3 from model import D_NET256 netD = D_NET256() netD.load_state_dict(state_dict["netD"][2]) netD.eval() # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')].split("/")[-1] save_dir = '%s/%s/%s' % ("../output", s_tmp, split_dir) mkdir_p(save_dir) logger.info("Saving images to: {}".format(save_dir)) number_batches = num_samples // batch_size if number_batches < 1: number_batches = 1 data_iter = iter(self.data_loader) real_labels, fake_labels, match_labels = self.prepare_labels() for step in tqdm(range(number_batches)): data = data_iter.next() imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot, _ = prepare_data( data, eval=True) transf_matrices = transformation_matrices[0] transf_matrices_inv = transformation_matrices[1] hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach(), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) local_noise.data.normal_(0, 1) inputs = (noise, local_noise, sent_emb, words_embs, mask, transf_matrices, transf_matrices_inv, label_one_hot, max_objects) inputs = tuple( (inp.to(cfg.DEVICE) if isinstance(inp, torch.Tensor ) else inp) for inp in inputs) with torch.no_grad(): fake_imgs, _, mu, logvar = netG(*inputs) inputs = (fake_imgs, fake_labels, transf_matrices, transf_matrices_inv, max_objects) codes = netsD[-1].partial_forward(*inputs)
def sampling(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.cuda() netG.eval() # load text encoder text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() #load image encoder image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) print('Load image encoder from:', img_encoder_path) image_encoder = image_encoder.cuda() image_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() model_dir = cfg.TRAIN.NET_G state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) # state_dict = torch.load(cfg.TRAIN.NET_G) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 R_count = 0 R = np.zeros(30000) cont = True for ii in range(11): # (cfg.TEXT.CAPTIONS_PER_IMAGE): if (cont == False): break for step, data in enumerate(self.data_loader, 0): cnt += batch_size if (cont == False): break if step % 100 == 0: print('cnt: ', cnt) # if step > 50: # break imgs, captions, cap_lens, class_ids, keys = prepare_data( data) hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs, mask, cap_lens) for j in range(batch_size): s_tmp = '%s/single/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): #print('Make a new folder: ', folder) mkdir_p(folder) k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d_%d.png' % (s_tmp, k, ii) im.save(fullpath) _, cnn_code = image_encoder(fake_imgs[-1]) for i in range(batch_size): mis_captions, mis_captions_len = self.dataset.get_mis_caption( class_ids[i]) hidden = text_encoder.init_hidden(99) _, sent_emb_t = text_encoder(mis_captions, mis_captions_len, hidden) rnn_code = torch.cat( (sent_emb[i, :].unsqueeze(0), sent_emb_t), 0) ### cnn_code = 1 * nef ### rnn_code = 100 * nef scores = torch.mm(cnn_code[i].unsqueeze(0), rnn_code.transpose(0, 1)) # 1* 100 cnn_code_norm = torch.norm(cnn_code[i].unsqueeze(0), 2, dim=1, keepdim=True) rnn_code_norm = torch.norm(rnn_code, 2, dim=1, keepdim=True) norm = torch.mm(cnn_code_norm, rnn_code_norm.transpose(0, 1)) scores0 = scores / norm.clamp(min=1e-8) if torch.argmax(scores0) == 0: R[R_count] = 1 R_count += 1 if R_count >= 30000: sum = np.zeros(10) np.random.shuffle(R) for i in range(10): sum[i] = np.average(R[i * 3000:(i + 1) * 3000 - 1]) R_mean = np.average(sum) R_std = np.std(sum) print("R mean:{:.4f} std:{:.4f}".format(R_mean, R_std)) cont = False
if cfg.CUDA: torch.cuda.manual_seed_all(args.manualSeed) ########################################################################## now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') isTrainable = 'T' layers = 1 output_dir = '../output/{0}_L{1}_TRX_{2}_{3}_{4}'.format( isTrainable, layers, cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) model_dir = os.path.join(output_dir, 'Model') image_dir = os.path.join(output_dir, 'Image') metrics_dir = os.path.join(output_dir, 'Metrics') mkdir_p(model_dir) mkdir_p(image_dir) mkdir_p(metrics_dir) torch.cuda.set_device(cfg.GPU_ID) cudnn.benchmark = True tb_dir = '../tensorboard/{0}_L{1}_TRX_{2}_{3}_{4}'.format( isTrainable, layers, cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) mkdir_p(tb_dir) tbw = SummaryWriter(log_dir=tb_dir) # Tensorboard logging # Get data loader ################################################## imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1)) batch_size = cfg.TRAIN.BATCH_SIZE image_transform = transforms.Compose([
def sampling(self, split_dir): if cfg.TRAIN.NET_G == '' or cfg.TRAIN.NET_C == '': print('Error: the path for main module or DCM is not found!') else: if split_dir == 'test': split_dir = 'valid' if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.cuda() netG.eval() # The text encoder text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() # The image encoder image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) print('Load image encoder from:', img_encoder_path) image_encoder = image_encoder.cuda() image_encoder.eval() # The VGG network VGG = VGGNet() print("Load the VGG model") VGG.cuda() VGG.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() # The DCM netDCM = DCM_Net() if cfg.TRAIN.NET_C != '': state_dict = \ torch.load(cfg.TRAIN.NET_C, map_location=lambda storage, loc: storage) netDCM.load_state_dict(state_dict) print('Load DCM from: ', cfg.TRAIN.NET_C) netDCM.cuda() netDCM.eval() model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) # the path to save modified images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 idx = 0 for _ in range(5): # (cfg.TEXT.CAPTIONS_PER_IMAGE): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 100 == 0: print('step: ', step) imgs, captions, cap_lens, class_ids, keys, wrong_caps, \ wrong_caps_len, wrong_cls_id = prepare_data(data) ####################################################### # (1) Extract text and image embeddings ###################################################### hidden = text_encoder.init_hidden(batch_size) words_embs, sent_emb = text_encoder( wrong_caps, wrong_caps_len, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (wrong_caps == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] region_features, cnn_code = \ image_encoder(imgs[cfg.TREE.BRANCH_NUM - 1]) ####################################################### # (2) Modify real images ###################################################### noise.data.normal_(0, 1) fake_imgs, attention_maps, mu, logvar, h_code, c_code = netG( noise, sent_emb, words_embs, mask, cnn_code, region_features) real_img = imgs[cfg.TREE.BRANCH_NUM - 1] real_features = VGG(real_img)[0] fake_img = netDCM(h_code, real_features, sent_emb, words_embs, \ mask, c_code) for j in range(batch_size): s_tmp = '%s/single' % (save_dir) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) k = -1 im = fake_img[j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, idx) idx = idx + 1 im.save(fullpath)
if args.gpu_id == -1: cfg.CUDA = False else: cfg.GPU_ID = args.gpu_id if args.data_dir != '': cfg.DATA_DIR = args.data_dir assert 'real' in cfg.CONFIG_NAME output_dir = '../output/%s_%s' % \ (cfg.DATASET_NAME, cfg.CONFIG_NAME) log_dir = output_dir + '/log' mkdir_p(output_dir) mkdir_p(output_dir + '/imgs') mkdir_p(output_dir + '/models') mkdir_p(log_dir) logger = setup_logger(cfg.CONFIG_NAME, log_dir) logger.info('Using config:') logger.info(str(cfg)) if not cfg.TRAIN.FLAG: args.manualSeed = 100 elif args.manualSeed is None: args.manualSeed = 100 #args.manualSeed = random.randint(1, 10000) logger.info("seed now is : ", str(args.manualSeed))
args.manualSeed = random.randint(1, 10000) random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if cfg.CUDA: torch.cuda.manual_seed_all(args.manualSeed) ########################################################################## now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') output_dir = '../output/%s_%s_%s' % \ (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) model_dir = os.path.join(output_dir, 'Model') image_dir = os.path.join(output_dir, 'Image') mkdir_p(model_dir) mkdir_p(image_dir) torch.cuda.set_device(cfg.GPU_ID) cudnn.benchmark = True # Get data loader ################################################## imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1)) batch_size = cfg.TRAIN.BATCH_SIZE image_transform = transforms.Compose([ transforms.Resize(int(imsize * 76 / 64)), transforms.RandomCrop(imsize), transforms.RandomHorizontalFlip() ]) dataset = FaceDataset(cfg.DATA_DIR, 'train',
def gen_example(n_words, wordtoix, ixtoword, model_dir): '''generate images from example sentences''' # filepath = 'example_captions.txt' filepath = 'caption.txt' data_dic = {} with open(filepath, "r") as f: filenames = f.read().split('\n') captions = [] cap_lens = [] for sent in filenames: if len(sent) == 0: continue sent = sent.replace("\ufffd\ufffd", " ") tokenizer = RegexpTokenizer(r'\w+') tokens = tokenizer.tokenize(sent.lower()) if len(tokens) == 0: print('sentence token == 0 !') continue rev = [] for t in tokens: t = t.encode('ascii', 'ignore').decode('ascii') if len(t) > 0 and t in wordtoix: rev.append(wordtoix[t]) captions.append(rev) cap_lens.append(len(rev)) max_len = np.max(cap_lens) sorted_indices = np.argsort(cap_lens)[::-1] cap_lens = np.asarray(cap_lens) cap_lens = cap_lens[sorted_indices] cap_array = np.zeros((len(captions), max_len), dtype='int64') for i in range(len(captions)): idx = sorted_indices[i] cap = captions[idx] c_len = len(cap) cap_array[i, :c_len] = cap # key = name[(name.rfind('/') + 1):] key = 0 data_dic[key] = [cap_array, cap_lens, sorted_indices] # algo.gen_example(data_dic) text_encoder = RNN_ENCODER(n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder.eval() netG = G_NET() netG.apply(weights_init) # netG.cuda() netG.eval() state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) save_dir = 'results/' mkdir_p(save_dir) for key in data_dic: captions, cap_lens, sorted_indices = data_dic[key] batch_size = captions.shape[0] nz = cfg.GAN.Z_DIM with torch.no_grad(): captions = Variable(torch.from_numpy(captions)) cap_lens = Variable(torch.from_numpy(cap_lens)) # captions = captions.cuda() # cap_lens = cap_lens.cuda() for i in range(image_per_caption): # 16 with torch.no_grad(): noise = Variable(torch.FloatTensor(batch_size, nz)) # noise = noise.cuda() # (1) Extract text embeddings hidden = text_encoder.init_hidden(batch_size) words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) mask = (captions == 0) # (2) Generate fake images noise.data.normal_(0, 1) fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask, cap_lens) cap_lens_np = cap_lens.data.numpy() for j in range(batch_size): save_name = '%s/%d_%d' % (save_dir, i, sorted_indices[j]) for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) fullpath = '%s_g%d.png' % (save_name, k) im.save(fullpath) for k in range(len(attention_maps)): if len(fake_imgs) > 1: im = fake_imgs[k + 1] else: im = fake_imgs[0] attn_maps = attention_maps[k] att_sze = attn_maps.size(2) img_set, sentences = \ build_super_images2(im[j].unsqueeze(0), captions[j].unsqueeze(0), [cap_lens_np[j]], ixtoword, [attn_maps[j]], att_sze) if img_set is not None: im = Image.fromarray(img_set) fullpath = '%s_a%d_attention.png' % (save_name, k) im.save(fullpath)
def sample2(self, datapath, stage=2): if stage == 1: netG, _, _ = self.load_network_stageI() else: netG, _, _ = self.load_network_stageII() netG.eval() # Load text embeddings generated from the encoder embeddings = np.load(datapath) num_embeddings = embeddings.shape[0] print('Successfully load sentences from: ', datapath) print('num_embeddings:', num_embeddings, embeddings.shape) # path to save generated samples save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')] mkdir_p(save_dir) batch_size = np.minimum(num_embeddings, self.batch_size) nz = cfg.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)) if cfg.CUDA: noise = noise.cuda() count = 0 while count < num_embeddings: # if count > 3000: # break iend = count + batch_size if iend > num_embeddings: iend = num_embeddings embeddings_batch = embeddings[count:iend] txt_embedding = Variable(torch.FloatTensor(embeddings_batch)) if cfg.CUDA: txt_embedding = txt_embedding.cuda() ####################################################### # (2) Generate fake images ####################################################### noise.data.normal_(0, 1) inputs = (txt_embedding, noise[0:embeddings_batch.shape[0], :]) lr_fake_imgs, fake_imgs, mu, logvar = \ nn.parallel.data_parallel(netG, inputs, self.gpus) for i in range(embeddings_batch.shape[0]): im = fake_imgs[i].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) save_name = '%s/%d_%d.png' % (save_dir, im.size[0], count + i + 1) im.save(save_name) for i in range(embeddings_batch.shape[0]): im = lr_fake_imgs[i].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) save_name = '%s/%d_%d.png' % (save_dir, im.size[0], count + i + 1) im.save(save_name) count += batch_size
def gen_example(self, data_dic): if cfg.TRAIN.NET_G == '' or cfg.TRAIN.NET_C == '': print('Error: the path for main module or DCM is not found!') else: # The text encoder text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() # The image encoder """ image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) print('Load image encoder from:', img_encoder_path) image_encoder = image_encoder.cuda() image_encoder.eval() """ """ image_encoder = CNN_dummy() image_encoder = image_encoder.cuda() image_encoder.eval() """ # The VGG network VGG = VGG16() print("Load the VGG model") VGG.cuda() VGG.eval() # The main module if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = EncDecNet() s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')] s_tmp = os.path.join(cfg.DATA_DIR, 'output', self.args.netG, 'valid/gen_example') model_dir = os.path.join(cfg.DATA_DIR, 'output', self.args.netG, 'Model/netG_epoch_8.pth') state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) #netG = nn.DataParallel(netG, device_ids= self.gpus) netG.cuda() netG.eval() for key in data_dic: save_dir = '%s/%s' % (s_tmp, key) mkdir_p(save_dir) captions, cap_lens, sorted_indices, imgs = data_dic[key] batch_size = captions.shape[0] nz = cfg.GAN.Z_DIM captions = Variable(torch.from_numpy(captions), volatile=True) cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True) captions = captions.cuda() cap_lens = cap_lens.cuda() for i in range(1): noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() ####################################################### # (1) Extract text and image embeddings ###################################################### hidden = text_encoder.init_hidden(batch_size) # The text embeddings words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() # The image embeddings mask = (captions == 0) ####################################################### # (2) Modify real images ###################################################### noise.data.normal_(0, 1) imgs_256 = imgs[-1].unsqueeze(0).repeat( batch_size, 1, 1, 1) enc_features = VGG(imgs_256) fake_img, mu, logvar = nn.parallel.data_parallel( netG, (imgs[-1], sent_emb, words_embs, noise, mask, enc_features), self.gpus) cap_lens_np = cap_lens.cpu().data.numpy() one_imgs = [] for j in range(captions.shape[0]): font = ImageFont.truetype('./FreeMono.ttf', 20) canv = Image.new('RGB', (256, 256), (255, 255, 255)) draw = ImageDraw.Draw(canv) sent = [] for k in range(len(captions[j])): if (captions[j][k] == 0): break word = self.ixtoword[captions[j][k].item()].encode( 'ascii', 'ignore').decode('ascii') if (k % 2 == 1): word = word + '\n' sent.append(word) fake_sent = ' '.join(sent) draw.text((0, 0), fake_sent, font=font, fill=(0, 0, 0)) canv_np = np.asarray(canv) real_im = imgs[-1] real_im = (real_im + 1) * 127.5 real_im = real_im.cpu().numpy().astype(np.uint8) real_im = np.transpose(real_im, (1, 2, 0)) fake_im = fake_img[j] fake_im = (fake_im + 1.0) * 127.5 fake_im = fake_im.detach().cpu().numpy().astype( np.uint8) fake_im = np.transpose(fake_im, (1, 2, 0)) one_img = np.concatenate([real_im, canv_np, fake_im], axis=1) one_imgs.append(one_img) img_set = np.concatenate(one_imgs, axis=0) super_img = Image.fromarray(img_set) full_path = os.path.join(save_dir, 'super.png') super_img.save(full_path) """ for j in range(5): ## batch_size save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j]) for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_g%d.png' % (save_name, k) im.save(fullpath) for k in range(len(attention_maps)): if len(fake_imgs) > 1: im = fake_imgs[k + 1].detach().cpu() else: im = fake_imgs[0].detach().cpu() attn_maps = attention_maps[k] att_sze = attn_maps.size(2) """ """ img_set, sentences = \ build_super_images2(im[j].unsqueeze(0), captions[j].unsqueeze(0), [cap_lens_np[j]], self.ixtoword, [attn_maps[j]], att_sze) if img_set is not None: im = Image.fromarray(img_set) fullpath = '%s_a%d.png' % (save_name, k) im.save(fullpath) """ """
def sampling(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for main module is not found!') else: if split_dir == 'test': split_dir = 'valid' if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = EncDecNet() netG.apply(weights_init) netG.cuda() netG.eval() # The text encoder text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() # The image encoder """ image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) print('Load image encoder from:', img_encoder_path) image_encoder = image_encoder.cuda() image_encoder.eval() """ # The VGG network VGG = VGG16() print("Load the VGG model") VGG.cuda() VGG.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() model_dir = os.path.join(cfg.DATA_DIR, 'output', self.args.netG, 'Model/netG_epoch_600.pth') state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) # the path to save modified images save_dir_valid = os.path.join(cfg.DATA_DIR, 'output', self.args.netG, 'valid') #mkdir_p(save_dir) cnt = 0 idx = 0 for i in range(5): # (cfg.TEXT.CAPTIONS_PER_IMAGE): # the path to save modified images save_dir = os.path.join(save_dir_valid, 'valid_%d' % i) save_dir_super = os.path.join(save_dir, 'super') save_dir_single = os.path.join(save_dir, 'single') mkdir_p(save_dir_super) mkdir_p(save_dir_single) for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 100 == 0: print('step: ', step) imgs, w_imgs, captions, cap_lens, class_ids, keys, wrong_caps, \ wrong_caps_len, wrong_cls_id = prepare_data(data) ####################################################### # (1) Extract text and image embeddings ###################################################### hidden = text_encoder.init_hidden(batch_size) words_embs, sent_emb = text_encoder( wrong_caps, wrong_caps_len, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (wrong_caps == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Modify real images ###################################################### noise.data.normal_(0, 1) fake_img, mu, logvar = netG(imgs[-1], sent_emb, words_embs, noise, mask, VGG) img_set = build_images(imgs[-1], fake_img, captions, wrong_caps, self.ixtoword) img = Image.fromarray(img_set) full_path = '%s/super_step%d.png' % (save_dir_super, step) img.save(full_path) for j in range(batch_size): s_tmp = '%s/single' % (save_dir_single) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) k = -1 im = fake_img[j].data.cpu().numpy() #im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, idx) idx = idx + 1 im.save(fullpath)
def sampling(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator netG = G_NET(len(self.cats_index_dict)) netG.apply(weights_init) netG.eval() if cfg.CUDA: netG.cuda() if len(cfg.GPU_IDS) > 1: netG = nn.DataParallel(netG) netG.to(self.device) batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = Variable( torch.FloatTensor(batch_size, cfg.ROI.BOXES_NUM, len(self.cats_index_dict) * 4)) noise = noise.cuda() model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) # state_dict = torch.load(cfg.TRAIN.NET_G) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 for _ in range(1): # (cfg.TEXT.CAPTIONS_PER_IMAGE): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 100 == 0: print('step: ', step) # if step > 50: # break imgs, pooled_hmaps, hmaps, bbox_maps_fwd, bbox_maps_bwd, bbox_fmaps, \ rois, fm_rois, num_rois, class_ids, keys = prepare_data(data) num_rois = num_rois.data.cpu().numpy() cats_list = [] for batch_index in range(self.batch_size): cats = [] for roi_index in range(num_rois[batch_index]): rela_cat_id = int(rois[batch_index, roi_index, 4]) abs_cat_id = self.cats_dict[rela_cat_id][0] cat = self.ixtoword[abs_cat_id].encode( 'ascii', 'ignore').decode('ascii') cats.append(cat) cats_list.append(cats) ####################################################### # (2) Generate fake images ###################################################### max_num_roi = max(num_rois) noise.data.normal_(0, 1) fake_hmaps = netG(noise[:, :max_num_roi], bbox_maps_fwd, bbox_maps_bwd, bbox_fmaps) fake_hmaps = fake_hmaps.repeat(1, 1, 3, 1, 1) for j in range(batch_size): s_tmp = '%s/single/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) k = 0 # for k in range(len(fake_imgs)): im = fake_hmaps[j][k].data.cpu().numpy() minV = im.min() maxV = im.max() im = (im - minV) / (maxV - minV) im *= 255 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) cat = cats_list[j][k] fullpath = '{0}_{1}.png'.format(s_tmp, cat) im.save(fullpath)
def sampling(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: netG_list = [ '../models/netG_epoch_50.pth', '../models/netG_epoch_60.pth', '../models/netG_epoch_70.pth', '../models/netG_epoch_80.pth', '../models/netG_epoch_90.pth', '../models/netG_epoch_100.pth', '../models/netG_epoch_110.pth', '../models/netG_epoch_120.pth', '../models/netG_epoch_130.pth', '../models/netG_epoch_140.pth', '../models/netG_epoch_150.pth', '../models/netG_epoch_160.pth' ] if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.cuda() netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) # state_dict = torch.load(cfg.TRAIN.NET_G) print("LINE==380") print("-----------------netG------------------------") print(netG) print("--------------state-dict---------------------") #print(state_dict) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) print( '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' ) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) print('save_dir:', save_dir) mkdir_p(save_dir) cnt = 0 for _ in range(1): # (cfg.TEXT.CAPTIONS_PER_IMAGE): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 100 == 0: print('step: ', step) # if step > 50: # break imgs, captions, cap_lens, class_ids, keys = prepare_data( data) hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs, mask) for j in range(batch_size): s_tmp = '%s/single/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, k) im.save(fullpath)
def gen_example(self, data_dic): if cfg.TRAIN.NET_G == '' or cfg.TRAIN.NET_C == '': print('Error: the path for main module or DCM is not found!') else: # The text encoder text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() # The image encoder image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) print('Load image encoder from:', img_encoder_path) image_encoder = image_encoder.cuda() image_encoder.eval() # The VGG network VGG = VGGNet() print("Load the VGG model") VGG.cuda() VGG.eval() # The main module if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')] model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) netG.cuda() netG.eval() # The DCM netDCM = DCM_Net() if cfg.TRAIN.NET_C != '': state_dict = \ torch.load(cfg.TRAIN.NET_C, map_location=lambda storage, loc: storage) netDCM.load_state_dict(state_dict) print('Load DCM from: ', cfg.TRAIN.NET_C) netDCM.cuda() netDCM.eval() for key in data_dic: save_dir = '%s/%s' % (s_tmp, key) mkdir_p(save_dir) captions, cap_lens, sorted_indices, imgs = data_dic[key] batch_size = captions.shape[0] nz = cfg.GAN.Z_DIM captions = Variable(torch.from_numpy(captions), volatile=True) cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True) captions = captions.cuda() cap_lens = cap_lens.cuda() for i in range(1): noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() ####################################################### # (1) Extract text and image embeddings ###################################################### hidden = text_encoder.init_hidden(batch_size) # The text embeddings words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) # The image embeddings region_features, cnn_code = \ image_encoder(imgs[cfg.TREE.BRANCH_NUM - 1].unsqueeze(0)) mask = (captions == 0) ####################################################### # (2) Modify real images ###################################################### noise.data.normal_(0, 1) fake_imgs, attention_maps, mu, logvar, h_code, c_code = netG( noise, sent_emb, words_embs, mask, cnn_code, region_features) real_img = imgs[cfg.TREE.BRANCH_NUM - 1].unsqueeze(0) real_features = VGG(real_img)[0] fake_img = netDCM(h_code, real_features, sent_emb, words_embs, \ mask, c_code) cap_lens_np = cap_lens.cpu().data.numpy() for j in range(batch_size): save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j]) for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_g%d.png' % (save_name, k) im.save(fullpath) for k in range(len(attention_maps)): if len(fake_imgs) > 1: im = fake_imgs[k + 1].detach().cpu() else: im = fake_imgs[0].detach().cpu() attn_maps = attention_maps[k] att_sze = attn_maps.size(2) img_set, sentences = \ build_super_images2(im[j].unsqueeze(0), captions[j].unsqueeze(0), [cap_lens_np[j]], self.ixtoword, [attn_maps[j]], att_sze) if img_set is not None: im = Image.fromarray(img_set) fullpath = '%s_a%d.png' % (save_name, k) im.save(fullpath) save_name = '%s/%d_sf_%d' % (save_dir, 1, sorted_indices[j]) im = fake_img[j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_SF.png' % (save_name) im.save(fullpath) save_name = '%s/%d_s_%d' % (save_dir, 1, 9) im = imgs[2].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_SR.png' % (save_name) im.save(fullpath)
def sampling(self, split_dir, model): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) if cfg.GPU_ID != -1: netG.cuda() netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) if cfg.GPU_ID != -1: text_encoder = text_encoder.cuda() text_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM with torch.no_grad(): noise = Variable(torch.FloatTensor(batch_size, nz)) if cfg.GPU_ID != -1: noise = noise.cuda() model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) # state_dict = torch.load(cfg.TRAIN.NET_G) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 #new if cfg.TRAIN.CLIP_SENTENCODER: print("Use CLIP SentEncoder for sampling") for _ in range(1): # (cfg.TEXT.CAPTIONS_PER_IMAGE): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 100 == 0: print('step: ', step) # if step > 50: # break #imgs, captions, cap_lens, class_ids, keys = prepare_data(data) #new imgs, captions, cap_lens, class_ids, keys, texts = prepare_data( data) hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] # new if cfg.TRAIN.CLIP_SENTENCODER: # random select one paragraph for each training example sents = [] for idx in range(len(texts)): sents_per_image = texts[idx].split( '\n') # new 3/11 if len(sents_per_image) > 1: sent_ix = np.random.randint( 0, len(sents_per_image) - 1) else: sent_ix = 0 sents.append(sents_per_image[sent_ix]) with open('%s/%s' % (save_dir, 'eval_sents.txt'), 'a+') as f: f.write(sents_per_image[sent_ix] + '\n') # print('sents: ', sents) sent = clip.tokenize(sents) # .to(device) # load clip #model = torch.jit.load("model.pt").cuda().eval() sent_input = sent if cfg.GPU_ID != -1: sent_input = sent.cuda() # print("text input", sent_input) with torch.no_grad(): sent_emb = model.encode_text(sent_input).float() ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs, mask) for j in range(batch_size): s_tmp = '%s/fake/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) print('Make a new folder: ', f'{save_dir}/real') mkdir_p(f'{save_dir}/real') print('Make a new folder: ', f'{save_dir}/text') mkdir_p(f'{save_dir}/text') k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, k) im.save(fullpath) temp = keys[j].replace('b', '').replace("'", '') shutil.copy(f"../data/Face/images/{temp}.jpg", f"{save_dir}/real/") shutil.copy(f"../data/Face/text/{temp}.txt", f"{save_dir}/text/")
def sample(self, split_dir, num_samples=25, draw_bbox=False): from PIL import Image, ImageDraw, ImageFont import cPickle as pickle import torchvision import torchvision.utils as vutils if cfg.TRAIN.NET_G == '': print('Error: the path for model NET_G is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() batch_size = cfg.TRAIN.BATCH_SIZE nz = cfg.GAN.Z_DIM model_dir = cfg.TRAIN.NET_G state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) # state_dict = torch.load(cfg.TRAIN.NET_G) netG = G_NET() print('Load G from: ', model_dir) netG.apply(weights_init) netG.load_state_dict(state_dict["netG"]) netG.cuda() netG.eval() # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s_%s' % (s_tmp, split_dir) mkdir_p(save_dir) ####################################### noise = Variable(torch.FloatTensor(9, nz)) imsize = 256 for step, data in enumerate(self.data_loader, 0): if step >= num_samples: break imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot, bbox = \ prepare_data(data, eval=True) transf_matrices_inv = transformation_matrices[1][0].unsqueeze(0) label_one_hot = label_one_hot[0].unsqueeze(0) img = imgs[-1][0] val_image = img.view(1, 3, imsize, imsize) hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) words_embs, sent_emb = words_embs[0].unsqueeze(0).detach(), sent_emb[0].unsqueeze(0).detach() words_embs = words_embs.repeat(9, 1, 1) sent_emb = sent_emb.repeat(9, 1) mask = (captions == 0) mask = mask[0].unsqueeze(0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] mask = mask.repeat(9, 1) transf_matrices_inv = transf_matrices_inv.repeat(9, 1, 1, 1) label_one_hot = label_one_hot.repeat(9, 1, 1) ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) inputs = (noise, sent_emb, words_embs, mask, transf_matrices_inv, label_one_hot) with torch.no_grad(): fake_imgs, _, mu, logvar = nn.parallel.data_parallel(netG, inputs, self.gpus) data_img = torch.FloatTensor(10, 3, imsize, imsize).fill_(0) data_img[0] = val_image data_img[1:10] = fake_imgs[-1] if draw_bbox: for idx in range(3): x, y, w, h = tuple([int(imsize*x) for x in bbox[0, idx]]) w = imsize-1 if w > imsize-1 else w h = imsize-1 if h > imsize-1 else h if x <= -1: break data_img[:10, :, y, x:x + w] = 1 data_img[:10, :, y:y + h, x] = 1 data_img[:10, :, y+h, x:x + w] = 1 data_img[:10, :, y:y + h, x + w] = 1 # get caption cap = captions[0].data.cpu().numpy() sentence = "" for j in range(len(cap)): if cap[j] == 0: break word = self.ixtoword[cap[j]].encode('ascii', 'ignore').decode('ascii') sentence += word + " " sentence = sentence[:-1] vutils.save_image(data_img, '{}/{}_{}.png'.format(save_dir, sentence, step), normalize=True, nrow=10) print("Saved {} files to {}".format(step, save_dir))
def sampling(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for models is not found!') else: if split_dir == 'test': split_dir = 'valid' if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.cuda() netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 idx = 0 ### avg_ddva = 0 for _ in range(1): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 100 == 0: print('step: ', step) captions, cap_lens, imperfect_captions, imperfect_cap_lens, misc = data # Generate images for human-text ---------------------------------------------------------------- data_human = [captions, cap_lens, misc] imgs, captions, cap_lens, class_ids, keys, wrong_caps,\ wrong_caps_len, wrong_cls_id= prepare_data(data_human) hidden = text_encoder.init_hidden(batch_size) words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] noise.data.normal_(0, 1) fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs, mask) # Generate images for imperfect caption-text------------------------------------------------------- data_imperfect = [ imperfect_captions, imperfect_cap_lens, misc ] imgs, imperfect_captions, imperfect_cap_lens, class_ids, imperfect_keys, wrong_caps,\ wrong_caps_len, wrong_cls_id = prepare_data(data_imperfect) hidden = text_encoder.init_hidden(batch_size) words_embs, sent_emb = text_encoder( imperfect_captions, imperfect_cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (imperfect_captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] noise.data.normal_(0, 1) imperfect_fake_imgs, _, _, _ = netG( noise, sent_emb, words_embs, mask) # Sort the results by keys to align ---------------------------------------------------------------- keys, captions, cap_lens, fake_imgs, _, _ = sort_by_keys( keys, captions, cap_lens, fake_imgs, None, None) imperfect_keys, imperfect_captions, imperfect_cap_lens, imperfect_fake_imgs, true_imgs, _ = \ sort_by_keys(imperfect_keys, imperfect_captions, imperfect_cap_lens, imperfect_fake_imgs,\ imgs, None) # Shift device for the imgs, target_imgs and imperfect_imgs------------------------------------------------ for i in range(len(imgs)): imgs[i] = imgs[i].to(secondary_device) imperfect_fake_imgs[i] = imperfect_fake_imgs[i].to( secondary_device) fake_imgs[i] = fake_imgs[i].to(secondary_device) for j in range(batch_size): s_tmp = '%s/single' % (save_dir) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) k = -1 im = fake_imgs[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) cap_im = imperfect_fake_imgs[k][j].data.cpu().numpy() cap_im = (cap_im + 1.0) * 127.5 cap_im = cap_im.astype(np.uint8) cap_im = np.transpose(cap_im, (1, 2, 0)) # Uncomment to scale true image true_im = true_imgs[k][j].data.cpu().numpy() true_im = (true_im + 1.0) * 127.5 true_im = true_im.astype(np.uint8) true_im = np.transpose(true_im, (1, 2, 0)) # Uncomment to save images. #true_im = Image.fromarray(true_im) #fullpath = '%s_true_s%d.png' % (s_tmp, idx) #true_im.save(fullpath) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, idx) im.save(fullpath) #cap_im = Image.fromarray(cap_im) #fullpath = '%s_imperfect_s%d.png' % (s_tmp, idx) idx = idx + 1 #cap_im.save(fullpath) neg_ddva = negative_ddva( imperfect_fake_imgs, imgs, fake_imgs, reduce='mean', final_only=True).data.cpu().numpy() avg_ddva += neg_ddva * (-1) #text_caps = [[self.ixtoword[word] for word in sent if word!=0] for sent in captions.tolist()] #imperfect_text_caps = [[self.ixtoword[word] for word in sent if word!=0] for sent in # imperfect_captions.tolist()] print(step) avg_ddva = avg_ddva / (step + 1) print('\n\nAvg_DDVA: ', avg_ddva)
def save_img_results(self, netsG, noise, atts, image_atts, inception_model, classifiers, real_imgs, gen_iterations, name='current'): mkdir_p(self.image_dir) # Save images """ if self.args.kl_loss: fake_imgs, _, _ = netG(noise, atts) ## else: fake_imgs, _ = netG(noise, atts, image_att, inception_model, classifiers, imgs) """ fake_imgs = [] C_losses = None if not self.args.kl_loss: if cfg.TREE.BRANCH_NUM > 0: fake_img1, h_code1 = nn.parallel.data_parallel( netsG[0], (noise, atts, image_atts), self.gpus) fake_imgs.append(fake_img1) if self.args.split == 'train': ##for train: real_imgsの特徴量を使う。 att_embeddings, C_losses = classifier_loss( classifiers, inception_model, real_imgs[0], image_atts, C_losses) _, C_losses = classifier_loss(classifiers, inception_model, fake_img1, image_atts, C_losses) else: ##いらない att_embeddings, _ = classifier_loss( classifiers, inception_model, fake_img1, image_atts) if cfg.TREE.BRANCH_NUM > 1: fake_img2, h_code2 = nn.parallel.data_parallel( netsG[1], (h_code1, att_embeddings), self.gpus) fake_imgs.append(fake_img2) if self.args.split == 'train': att_embeddings, C_losses = classifier_loss( classifiers, inception_model, real_imgs[1], image_atts, C_losses) _, C_losses = classifier_loss(classifiers, inception_model, fake_img1, image_atts, C_losses) else: att_embeddings, _ = classifier_loss( classifiers, inception_model, fake_img1, image_atts) if cfg.TREE.BRANCH_NUM > 2: fake_img3 = nn.parallel.data_parallel( netsG[2], (h_code2, att_embeddings), self.gpus) fake_imgs.append(fake_img3) ##make image set img_set = build_images(fake_imgs) ## img = Image.fromarray(img_set) full_path = '%s/G_%s.png' % (self.image_dir, gen_iterations) img.save(full_path)
def __init__(self, output_dir, cap_model, vocab, eval_utils, my_resnet, word2idx, glove_model, idx2word, vocab_cap=None, eval_kwargs={}): # if cfg.TRAIN.FLAG: self.model_dir = os.path.join(output_dir, 'Model') self.image_dir = os.path.join(output_dir, 'Image') self.test_img_dir = os.path.join(output_dir, 'test') self.log_dir = os.path.join(output_dir, 'Log') mkdir_p(self.model_dir) mkdir_p(self.image_dir) mkdir_p(self.log_dir) mkdir_p(self.test_img_dir) #self.summary_writer = FileWriter(self.log_dir) self.cap_model = None self.max_epoch = cfg.TRAIN.MAX_EPOCH self.snapshot_interval = 5 self.gpus = None self.num_gpus = 1 if cfg.GPU_ID is not None: s_gpus = cfg.GPU_ID.split(',') self.gpus = [int(ix) for ix in s_gpus] self.num_gpus = len(self.gpus) self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus if cfg.CUDA: torch.cuda.set_device(self.gpus[0]) cudnn.benchmark = True self.vocab = vocab self.eval_kwargs = eval_kwargs self.eval_utils = eval_utils self.my_resnet = my_resnet self.text_dim = cfg.TEXT.DIMENSION self.word2idx = word2idx self.idx2word = idx2word self.glove_model = glove_model print("Length of glove model", len(glove_model)) if not cfg.CAP.USE: self.CTencoder = STEncoder(cfg.TEXT.HIDDENSTATE, 1, cfg.GAN.CONDITION_DIM, True, 128) ##layers, hidden size, bidirectional, emb_dim self.CTdecoder = STDuoDecoderAttn(256, cfg.TEXT.HIDDENSTATE, cfg.GAN.CONDITION_DIM, len(glove_model)) else: self.CTencoder = STEncoder(cfg.TEXT.HIDDENSTATE, 1, 512, True, 128) ##hidden_size, embedding_dim, thought_size, vocab_size self.CTdecoder = STDuoDecoderAttn(256, cfg.TEXT.HIDDENSTATE, 512, len(glove_model)) ##encoder_model, decoder_model, embedding_dim, vocab_size self.CTallmodel = UniSKIP_variant(self.CTencoder, self.CTdecoder, self.text_dim, len(self.glove_model), glove_model, word2idx, idx2word) self.cosEmbLoss = nn.CosineEmbeddingLoss() ## Loss Function self.CTloss = nn.CrossEntropyLoss() if (cfg.CUDA): self.cosEmbLoss = self.cosEmbLoss.cuda() self.CTloss = self.CTloss.cuda() self.CTencoder = self.CTencoder.cuda() self.CTdecoder = self.CTdecoder.cuda() self.CTallmodel = self.CTallmodel.cuda() self.new_arch = eval_kwargs['new_arch'] self.cap_model_bool = eval_kwargs['cap_flag'] self.vocab_cap = vocab_cap
def gen_samples(self, idx): text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from: {}'.format(cfg.TRAIN.NET_E)) text_encoder = text_encoder.cuda() text_encoder.eval() netG = G_NET() state_dict = torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: {}'.format(cfg.TRAIN.NET_G)) netG.cuda() netG.eval() s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')] save_dir = '%s/samples' % (s_tmp) mkdir_p(save_dir) batch_size = self.batch_size nz = cfg.GAN.Z_DIM with torch.no_grad(): noise = Variable(torch.FloatTensor(batch_size, nz)) noise = noise.cuda() step = 0 data_iter = iter(self.data_loader) while step < self.num_batches: data = data_iter.next() imgs, captions, cap_lens, class_ids, sorted_cap_indices = self.prepare_data( data) hidden = text_encoder.init_hidden(batch_size) words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] for i in range(10): noise.data.normal_(0, 1) fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask) cap_lens_np = cap_lens.cpu().data.numpy() for j in range(batch_size): right_idx = step * batch_size + sorted_cap_indices[j] save_name = '%s/%d_s_%d' % (save_dir, i, right_idx) original_idx = idx[right_idx] shutil.copyfile( '/.local/AttnGAN/data/FashionSynthesis/test/original/test128_{}.png' .format(original_idx + 1), save_dir + '/test128_{0}_{1}.png'.format( original_idx + 1, right_idx)) for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_g%d.png' % (save_name, k) im.save(fullpath) for k in range(len(attention_maps)): if len(fake_imgs) > 1: im = fake_imgs[k + 1].detach().cpu() else: im = fake_imgs[0].detach().cpu() attn_maps = attention_maps[k] att_sze = attn_maps.size(2) img_set, sentences = \ build_super_images2(im[j].unsqueeze(0), captions[j].unsqueeze(0), [cap_lens_np[j]], self.ixtoword, [attn_maps[j]], att_sze) if img_set is not None: im = Image.fromarray(img_set) fullpath = '%s_a%d.png' % (save_name, k) im.save(fullpath) step += 1
def sampling(self, split_dir, num_samples=30000): if cfg.TRAIN.NET_G == '': logger.error('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.to(cfg.DEVICE) netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) text_encoder = text_encoder.to(cfg.DEVICE) text_encoder.eval() logger.info('Loaded text encoder from: %s', cfg.TRAIN.NET_E) batch_size = self.batch_size[0] nz = cfg.GAN.GLOBAL_Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)).to(cfg.DEVICE) local_noise = Variable(torch.FloatTensor(batch_size, cfg.GAN.LOCAL_Z_DIM)).to(cfg.DEVICE) model_dir = cfg.TRAIN.NET_G state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict["netG"]) max_objects = 10 logger.info('Load G from: %s', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')].split("/")[-1] save_dir = '%s/%s/%s' % ("../output", s_tmp, split_dir) mkdir_p(save_dir) logger.info("Saving images to: {}".format(save_dir)) number_batches = num_samples // batch_size if number_batches < 1: number_batches = 1 data_iter = iter(self.data_loader) for step in tqdm(range(number_batches)): data = data_iter.next() imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot, _ = prepare_data( data, eval=True) transf_matrices = transformation_matrices[0] transf_matrices_inv = transformation_matrices[1] hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach(), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) local_noise.data.normal_(0, 1) inputs = (noise, local_noise, sent_emb, words_embs, mask, transf_matrices, transf_matrices_inv, label_one_hot, max_objects) inputs = tuple((inp.to(cfg.DEVICE) if isinstance(inp, torch.Tensor) else inp) for inp in inputs) with torch.no_grad(): fake_imgs, _, mu, logvar = netG(*inputs) for batch_idx, j in enumerate(range(batch_size)): s_tmp = '%s/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): logger.info('Make a new folder: %s', folder) mkdir_p(folder) k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, step*batch_size+batch_idx) im.save(fullpath)
def sample(self, datapath, stage=1): if stage == 1: netG, _ = self.load_network_stageI() else: netG, _ = self.load_network_stageII() netG.eval() # Load text embeddings generated from the encoder #t_file = torchfile.load(datapath) t_file = torch.load(datapath) #captions_list = t_file.raw_txt captions_list = t_file['raw_txt'] #embeddings = np.concatenate(t_file.fea_txt, axis=0) embeddings = np.concatenate(t_file['fea_txt'], axis=0) num_embeddings = len(captions_list) print('Successfully load sentences from: ', datapath) print('Total number of sentences:', num_embeddings) print('num_embeddings:', num_embeddings, embeddings.shape) # path to save generated samples #save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')] save_dir = '../../../Exp_label_stage2_2' mkdir_p(save_dir) batch_size = np.minimum(num_embeddings, self.batch_size) nz = cfg.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)) if cfg.CUDA: noise = noise.cuda() count = 0 while count < num_embeddings: # if count > 3000: # break iend = count + batch_size if iend > num_embeddings: iend = num_embeddings count = num_embeddings - batch_size embeddings_batch = embeddings[count:iend] captions_batch = captions_list[count:iend] txt_embedding = Variable( torch.FloatTensor(embeddings_batch)).float() if cfg.CUDA: txt_embedding = txt_embedding.cuda() ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) inputs = (txt_embedding, noise) if stage == 1: _, fake_imgs, mu, logvar, _ = \ nn.parallel.data_parallel(netG, inputs, self.gpus) else: _, fake_imgs, mu, logvar = \ nn.parallel.data_parallel(netG, inputs, self.gpus) for i in range(batch_size): save_name = '%s/%d.png' % (save_dir, count + i) #save_name = '%s/%s.png' % (save_dir,str(count)+'_'+str(i)) im = fake_imgs[i].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) im.save(save_name) count += batch_size
def sample(self, datapath, stage=1): if stage == 1: netG, _ = self.load_network_stageI() else: netG, _ = self.load_network_stageII() netG.eval() # Load text embeddings generated from the encoder t_file = torchfile.load(datapath) data_dir = osp.dirname(datapath) original_filenames = np.loadtxt('%s/val_filename.txt' % data_dir, dtype=str) original_captions = np.loadtxt('%s/val_captions.txt' % data_dir, dtype=str, delimiter='\n') captions_list = t_file.raw_txt # print(len(original_captions), len(captions_list)) # for i in range(len(captions_list)): # tmp1 = original_captions[i] # tmp2 = captions_list[i] # print(i) # print(tmp1) # print(tmp2) # print('------') # assert(tmp1 == tmp2) embeddings = np.concatenate(t_file.fea_txt, axis=0) num_embeddings = len(captions_list) print('Successfully load sentences from: ', datapath) print('Total number of sentences:', num_embeddings) print('num_embeddings:', num_embeddings, embeddings.shape) # path to save generated samples save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')] mkdir_p(save_dir) batch_size = np.minimum(num_embeddings, self.batch_size) nz = cfg.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)) if cfg.CUDA: noise = noise.cuda() count = 0 while count < num_embeddings: # if count > 3000: # break iend = count + batch_size if iend > num_embeddings: iend = num_embeddings count = num_embeddings - batch_size embeddings_batch = embeddings[count:iend] filenames_batch = original_filenames[count:iend] # captions_batch = captions_list[count:iend] txt_embedding = Variable(torch.FloatTensor(embeddings_batch)) if cfg.CUDA: txt_embedding = txt_embedding.cuda() ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) inputs = (txt_embedding, noise) _, fake_imgs, mu, logvar = \ nn.parallel.data_parallel(netG, inputs, self.gpus) for i in range(batch_size): # save_name = '%s/%05d.png' % (save_dir, count + i) save_name = osp.join(save_dir, filenames_batch[i] + '.jpg') im = fake_imgs[i].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) im.save(save_name) count += batch_size print(count)
def sampling(self): if self.args.netG == '': print('Error: the path for models is not found!') else: data_dir = cfg.DATA_DIR if self.args.split == "test_unseen": filepath = os.path.join(data_dir, "test_unseen/class_data.pickle") else: #test_seen filepath = os.path.join(data_dir, "test_seen/class_data.pickle") if os.path.isfile(filepath): with open(filepath, "rb") as f: data_dic = pkl.load(f) class_names = data_dic['classes'] class_ids = data_dic['class_info'] att_dir = os.path.join(data_dir, "CUB_200_2011/attributes") att_np = np.zeros((312, 200)) #for CUB with open(att_dir + "/class_attribute_labels_continuous.txt", "r") as f: for ind, line in enumerate(f.readlines()): line = line.strip("\n") line = list(map(float, line.split())) att_np[:, ind] = line if self.args.kl_loss: netG = G_NET() else: netG = G_NET_not_CA() test_model = "netG_epoch_600.pth" model_path = os.path.join(self.args.netG, "Model", test_model) ## state_dic = torch.load(model_path, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dic) netG.cuda() netG.eval() noise = torch.FloatTensor(self.batch_size, cfg.GAN.Z_DIM) for class_name, class_id in zip(class_names, class_ids): print("now generating, ", class_name) class_dir = os.path.join(self.args.netG, 'valid', test_model[:test_model.rfind(".")], self.args.split, class_name) atts = att_np[:, class_id - 1] atts = np.expand_dims(atts, axis=0) atts = atts.repeat(self.batch_size, axis=0) assert atts.shape == (self.batch_size, 312) if cfg.CUDA: noise = noise.cuda() atts = torch.cuda.FloatTensor(atts) else: atts = torch.FloatTensor(atts) for i in range(self.sample_num): noise.normal_(0, 1) if self.args.kl_loss: fake_imgs, _, _ = nn.parallel.data_parallel( netG, (noise, atts), self.gpus) else: fake_imgs = nn.parallel.data_parallel( netG, (noise, atts), self.gpus) for stage in range(len(fake_imgs)): for num, im in enumerate(fake_imgs[stage]): im = im.detach().cpu() im = im.add_(1).div_(2).mul_(255) im = im.numpy().astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) stage_dir = os.path.join(class_dir, "stage_%d" % stage) mkdir_p(stage_dir) img_path = os.path.join(stage_dir, "single_%d.png" % num) im.save(img_path) for j in range(int(self.batch_size / 20)): ## cfg.batch_size==100 one_set = [ fake_imgs[0][j * 20:(j + 1) * 20], fake_imgs[1][j * 20:(j + 1) * 20], fake_imgs[2][j * 20:(j + 1) * 20] ] img_set = build_images(one_set) img_set = Image.fromarray(img_set) super_dir = os.path.join(class_dir, "super") mkdir_p(super_dir) img_path = os.path.join(super_dir, "super_%d.png" % j) img_set.save(img_path)
def sampling(self, split_dir, num_samples=30000): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.cuda() netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)) noise = noise.cuda() model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) # state_dict = torch.load(cfg.TRAIN.NET_G) netG.load_state_dict(state_dict["netG"]) print('Load G from: ', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 for _ in range(1): # (cfg.TEXT.CAPTIONS_PER_IMAGE): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 10000 == 0: print('step: ', step) if step >= num_samples: break imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot = prepare_data(data) transf_matrices_inv = transformation_matrices[1] hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach(), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) inputs = (noise, sent_emb, words_embs, mask, transf_matrices_inv, label_one_hot) with torch.no_grad(): fake_imgs, _, mu, logvar = nn.parallel.data_parallel(netG, inputs, self.gpus) for j in range(batch_size): s_tmp = '%s/single/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, k) im.save(fullpath)
def sample(self, datapath, num_samples=25, stage=1, draw_bbox=True, max_objects=3): from PIL import Image, ImageDraw, ImageFont import pickle import torchvision import torchvision.utils as vutils img_dir = cfg.IMG_DIR if stage == 1: netG, _ = self.load_network_stageI() else: netG, _ = self.load_network_stageII() netG.eval() # Load text embeddings generated from the encoder t_file = torchfile.load(datapath + "val_captions.t7") captions_list = t_file.raw_txt embeddings = np.concatenate(t_file.fea_txt, axis=0) num_embeddings = len(captions_list) label, bbox = load_validation_data(datapath) filepath = os.path.join(datapath, 'val_filename.txt') with open(filepath, 'r') as f: # filenames = pickle.load(f) filenames = f.readlines() print('Successfully load sentences from: ', datapath) print('Total number of sentences:', num_embeddings) # path to save generated samples save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')] + "_visualize_bbox" print("saving to:", save_dir) mkdir_p(save_dir) if cfg.CUDA: if cfg.STAGE == 1: bbox = bbox.cuda() elif cfg.STAGE == 2: bbox = [bbox.clone().cuda(), bbox.cuda()] label = label.cuda() ####################################### if cfg.STAGE == 1: bbox_ = bbox.clone() elif cfg.STAGE == 2: bbox_ = bbox[0].clone() if cfg.STAGE == 1: bbox = bbox.view(-1, 4) transf_matrices_inv = compute_transformation_matrix_inverse(bbox) transf_matrices_inv = transf_matrices_inv.view( num_embeddings, max_objects, 2, 3) elif cfg.STAGE == 2: _bbox = bbox.view(-1, 4) transf_matrices_inv = compute_transformation_matrix_inverse(_bbox) transf_matrices_inv = transf_matrices_inv.view( num_embeddings, max_objects, 2, 3) _bbox = bbox.view(-1, 4) transf_matrices_inv_s2 = compute_transformation_matrix_inverse( _bbox) transf_matrices_inv_s2 = transf_matrices_inv_s2.view( num_embeddings, max_objects, 2, 3) transf_matrices_s2 = compute_transformation_matrix(_bbox) transf_matrices_s2 = transf_matrices_s2.view( num_embeddings, max_objects, 2, 3) # produce one-hot encodings of the labels _labels = label.long() # remove -1 to enable one-hot converting _labels[_labels < 0] = 80 # label_one_hot = torch.cuda.FloatTensor(num_embeddings, max_objects, 81).fill_(0) label_one_hot = torch.FloatTensor(num_embeddings, max_objects, 81).fill_(0) label_one_hot = label_one_hot.scatter_(2, _labels, 1).float() ####################################### nz = cfg.Z_DIM noise = Variable(torch.FloatTensor(9, nz)) if cfg.CUDA: noise = noise.cuda() imsize = 64 if stage == 1 else 256 for count in range(num_samples): index = int(np.random.randint(0, num_embeddings, 1)) key = filenames[index].strip('\n') img_name = img_dir + "/" + key + ".jpg" # img = Image.open(img_name).convert('RGB').resize((imsize, imsize), Image.ANTIALIAS) # val_image = torchvision.transforms.functional.to_tensor(img) # val_image = val_image.view(1, 3, imsize, imsize) # val_image = (val_image - 0.5) * 2 embeddings_batch = embeddings[index] transf_matrices_inv_batch = transf_matrices_inv[index] label_one_hot_batch = label_one_hot[index] embeddings_batch = np.reshape(embeddings_batch, (1, 1024)).repeat(9, 0) transf_matrices_inv_batch = transf_matrices_inv_batch.view( 1, 3, 2, 3).repeat(9, 1, 1, 1) label_one_hot_batch = label_one_hot_batch.view(1, 3, 81).repeat(9, 1, 1) if cfg.STAGE == 2: transf_matrices_s2_batch = transf_matrices_s2[index] transf_matrices_s2_batch = transf_matrices_s2_batch.view( 1, 3, 2, 3).repeat(9, 1, 1, 1) transf_matrices_inv_s2_batch = transf_matrices_inv_s2[index] transf_matrices_inv_s2_batch = transf_matrices_inv_s2_batch.view( 1, 3, 2, 3).repeat(9, 1, 1, 1) txt_embedding = Variable(torch.FloatTensor(embeddings_batch)) if cfg.CUDA: label_one_hot_batch = label_one_hot_batch.cuda() txt_embedding = txt_embedding.cuda() ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) # inputs = (txt_embedding, noise, transf_matrices_inv_batch, label_one_hot_batch) if cfg.STAGE == 1: inputs = (txt_embedding, noise, transf_matrices_inv_batch, label_one_hot_batch) elif cfg.STAGE == 2: inputs = (txt_embedding, noise, transf_matrices_inv_batch, transf_matrices_s2_batch, transf_matrices_inv_s2_batch, label_one_hot_batch) with torch.no_grad(): # _, fake_imgs, mu, logvar, _ = nn.parallel.data_parallel(netG, inputs, self.gpus) _, fake_imgs, mu, logvar, _ = netG(*inputs) data_img = torch.FloatTensor(10, 3, imsize, imsize).fill_(0) # data_img[0] = val_image data_img[0:10] = fake_imgs if draw_bbox: for idx in range(3): x, y, w, h = tuple( [int(imsize * x) for x in bbox_[index, idx]]) w = imsize - 1 if w > imsize - 1 else w h = imsize - 1 if h > imsize - 1 else h if x <= -1: break data_img[:10, :, y, x:x + w] = 1 data_img[:10, :, y:y + h, x] = 1 data_img[:10, :, y + h, x:x + w] = 1 data_img[:10, :, y:y + h, x + w] = 1 print('Caption: ', captions_list[index].decode('utf-8')) vutils.save_image(data_img, '{}/{}.png'.format( save_dir, captions_list[index].decode('utf-8')), normalize=True, nrow=5) print("Saved {} files to {}".format(count + 1, save_dir))
def gen_example(self, data_dic): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: # Build and load the generator text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() # the path to save generated images if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')] model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) netG.cuda() netG.eval() for key in data_dic: save_dir = '%s/%s' % (s_tmp, key) mkdir_p(save_dir) captions, cap_lens, sorted_indices = data_dic[key] batch_size = captions.shape[0] nz = cfg.GAN.Z_DIM captions = Variable(torch.from_numpy(captions), volatile=True) cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True) captions = captions.cuda() cap_lens = cap_lens.cuda() for i in range(1): # 16 noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() ####################################################### # (1) Extract text embeddings ###################################################### hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) mask = (captions == 0) ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) with torch.no_grad(): fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask) # G attention cap_lens_np = cap_lens.cpu().data.numpy() for j in range(batch_size): save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j]) for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) fullpath = '%s_g%d.png' % (save_name, k) im.save(fullpath) for k in range(len(attention_maps)): if len(fake_imgs) > 1: im = fake_imgs[k + 1].detach().cpu() else: im = fake_imgs[0].detach().cpu() attn_maps = attention_maps[k] att_sze = attn_maps.size(2) img_set, sentences = \ build_super_images2(im[j].unsqueeze(0), captions[j].unsqueeze(0), [cap_lens_np[j]], self.ixtoword, [attn_maps[j]], att_sze) if img_set is not None: im = Image.fromarray(img_set) fullpath = '%s_a%d.png' % (save_name, k) im.save(fullpath)
def gen_example(self, data_dic): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: # Build and load the generator text_encoder = \ BertEncoder(cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() # the path to save generated images if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG_mix = G_NET_MIX() s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')] model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) netG_mix.load_state_dict(state_dict) print('Load G from: ', model_dir) netG.cuda() netG_mix.cuda() netG.eval() netG_mix.eval() for key in data_dic: save_dir = '%s/%s' % (s_tmp, key) print(save_dir) mkdir_p(save_dir) captions, cap_lens, sorted_indices = data_dic[key] batch_size = captions.shape[0] nz = cfg.GAN.Z_DIM captions = Variable(torch.from_numpy(captions)) cap_lens = Variable(torch.from_numpy(cap_lens)) captions = captions.cuda() cap_lens = cap_lens.cuda() for i in range(1): # 16 noise = Variable(torch.FloatTensor(2, batch_size, nz)) noise = noise.cuda() ####################################################### # (1) Extract text embeddings ###################################################### # hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions) mask = (captions == 0) ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) noise1 = noise[0] noise2 = noise[1] fake_imgs_mix1, attention_maps, _, _ = netG_mix( noise, sent_emb, words_embs, mask) noise = torch.cat( [noise.chunk(2, 0)[1], noise.chunk(2, 0)[0]], dim=0) fake_imgs_mix2, attention_maps, _, _ = netG_mix( noise, sent_emb, words_embs, mask) fake_imgs1, attention_maps, _, _ = netG( noise1, sent_emb, words_embs, mask) fake_imgs2, attention_maps, _, _ = netG( noise2, sent_emb, words_embs, mask) # G attention cap_lens_np = cap_lens.cpu().data.numpy() for j in range(batch_size): save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j]) for k in range(len(fake_imgs_mix1)): im = fake_imgs_mix1[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) fullpath = '%s_g%d_AB.png' % (save_name, k) im.save(fullpath) for k in range(len(fake_imgs_mix2)): im = fake_imgs_mix2[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) fullpath = '%s_g%d_BA.png' % (save_name, k) im.save(fullpath) for k in range(len(fake_imgs1)): im = fake_imgs1[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) fullpath = '%s_g%d_A.png' % (save_name, k) im.save(fullpath) for k in range(len(fake_imgs2)): im = fake_imgs2[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) fullpath = '%s_g%d_B.png' % (save_name, k) im.save(fullpath)