def load_network(gpus): netEn_img = MLP_ENCODER_IMG() netEn_img.apply(weights_init) netEn_img = torch.nn.DataParallel(netEn_img, device_ids=gpus) print(netEn_img) netG = G_NET() netG.apply(weights_init) netG = torch.nn.DataParallel(netG, device_ids=gpus) print(netG) netsD = [] if cfg.TREE.BRANCH_NUM > 0: netsD.append(D_NET64()) if cfg.TREE.BRANCH_NUM > 1: netsD.append(D_NET128()) if cfg.TREE.BRANCH_NUM > 2: netsD.append(D_NET256()) for i in xrange(len(netsD)): netsD[i].apply(weights_init) netsD[i] = torch.nn.DataParallel(netsD[i], device_ids=gpus) print('# of netsD', len(netsD)) count = 0 if cfg.TRAIN.NET_G != '': state_dict = torch.load(cfg.TRAIN.NET_G) netG.load_state_dict(state_dict) print('Load ', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('_') + 1 iend = cfg.TRAIN.NET_G.rfind('.') count = cfg.TRAIN.NET_G[istart:iend] count = int(count) + 1 if cfg.TRAIN.NET_D != '': for i in xrange(len(netsD)): print('Load %s_%d.pth' % (cfg.TRAIN.NET_D, i)) state_dict = torch.load('%s%d.pth' % (cfg.TRAIN.NET_D, i)) netsD[i].load_state_dict(state_dict) if cfg.TRAIN.NET_MLP_IMG != '': state_dict = torch.load(cfg.TRAIN.NET_MLP_IMG) netEn_img.load_state_dict(state_dict) print('Load ', cfg.TRAIN.NET_MLP_IMG) inception_model = INCEPTION_V3() if cfg.CUDA: netG.cuda() netEn_img = netEn_img.cuda() for i in xrange(len(netsD)): netsD[i].cuda() inception_model = inception_model.cuda() inception_model.eval() return netG, netsD, netEn_img, inception_model, len(netsD), count
def build_generator(self, net_G): # Load trained generator model netG = G_NET() state_dict = torch.load(net_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) netG.eval() return netG
def evaluate_finegan(self): if cfg.TRAIN.NET_G == '': print('Error: the path for model not found!') else: # Build and load the generator netG = G_NET() netG.apply(weights_init) netG = torch.nn.DataParallel(netG, device_ids=self.gpus) model_dict = netG.state_dict() state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) state_dict = {k: v for k, v in state_dict.items() if k in model_dict} model_dict.update(state_dict) netG.load_state_dict(model_dict) print('Load ', cfg.TRAIN.NET_G) # Uncomment this to print Generator layers # print(netG) nz = cfg.GAN.Z_DIM noise = torch.FloatTensor(self.batch_size, nz) noise.data.normal_(0, 1) if cfg.CUDA: netG.cuda() noise = noise.cuda() netG.eval() background_class = cfg.TEST_BACKGROUND_CLASS parent_class = cfg.TEST_PARENT_CLASS child_class = cfg.TEST_CHILD_CLASS bg_code = torch.zeros([self.batch_size, cfg.FINE_GRAINED_CATEGORIES]) p_code = torch.zeros([self.batch_size, cfg.SUPER_CATEGORIES]) c_code = torch.zeros([self.batch_size, cfg.FINE_GRAINED_CATEGORIES]) for j in range(self.batch_size): bg_code[j][background_class] = 1 p_code[j][parent_class] = 1 c_code[j][child_class] = 1 fake_imgs, fg_imgs, mk_imgs, fgmk_imgs = netG(noise, c_code, p_code, bg_code) # Forward pass through the generator self.save_image(fake_imgs[0][0], self.save_dir, 'background') self.save_image(fake_imgs[1][0], self.save_dir, 'parent_final') self.save_image(fake_imgs[2][0], self.save_dir, 'child_final') self.save_image(fg_imgs[0][0], self.save_dir, 'parent_foreground') self.save_image(fg_imgs[1][0], self.save_dir, 'child_foreground') self.save_image(mk_imgs[0][0], self.save_dir, 'parent_mask') self.save_image(mk_imgs[1][0], self.save_dir, 'child_mask') self.save_image(fgmk_imgs[0][0], self.save_dir, 'parent_foreground_masked') self.save_image(fgmk_imgs[1][0], self.save_dir, 'child_foreground_masked')
def evaluate(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: # Build and load the generator netG = G_NET() netG.apply(weights_init) netG = torch.nn.DataParallel(netG, device_ids=self.gpus) print(netG) # state_dict = torch.load(cfg.TRAIN.NET_G) state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load ', cfg.TRAIN.NET_G) # the path to save generated images s_tmp = cfg.TRAIN.NET_G istart = s_tmp.rfind('_') + 1 iend = s_tmp.rfind('.') iteration = int(s_tmp[istart:iend]) s_tmp = s_tmp[:s_tmp.rfind('/')] save_dir = '%s/iteration%d/%s' % (s_tmp, iteration, split_dir) if cfg.TEST.B_EXAMPLE: folder = '%s/super' % (save_dir) else: folder = '%s/single' % (save_dir) print('Make a new folder: ', folder) mkdir_p(folder) nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(self.batch_size, nz)) if cfg.CUDA: netG.cuda() noise = noise.cuda() # switch to evaluate mode netG.eval() num_batches = int(cfg.TEST.SAMPLE_NUM / self.batch_size) cnt = 0 for step in range(num_batches): noise.data.normal_(0, 1) #hmxhmxhmxhmxhmxhmxhmxhmxhmxhmxhmxhmxhmxstart fake_imgs, layers_output, _, _ = netG(noise) if len(layers_output) != len(lamdas): print("please check lamdas length") #hmxhmxhmxhmxhmxhmxhmxhmxhmxhmxhmxhmxhmxend if cfg.TEST.B_EXAMPLE: self.save_superimages(fake_imgs[-1], folder, cnt, 256) else: self.save_singleimages(fake_imgs[-1], folder, cnt, 256) # self.save_singleimages(fake_imgs[-2], folder, 128) # self.save_singleimages(fake_imgs[-3], folder, 64) cnt += self.batch_size
def load_network(gpus): netG = G_NET() netG.apply(weights_init) netG = torch.nn.DataParallel(netG, device_ids=gpus) print(netG) netsD = [] # 128 * 128 if cfg.TREE.BRANCH_NUM > 1: for i in range( 3): # 3 discriminators for background, parent and child stage netsD.append(D_NET128(i)) # 256 * 256 if cfg.TREE.BRANCH_NUM > 2: for i in range( 3): # 3 discriminators for background, parent and child stage netsD.append(D_NET256(i)) # for i in range(3): # 3 discriminators for background, parent and child stage # netsD.append(D_NET128(i)) for i in range(len(netsD)): netsD[i].apply(weights_init) netsD[i] = torch.nn.DataParallel(netsD[i], device_ids=gpus) print(netsD[i]) count = 0 if cfg.TRAIN.NET_G != '': state_dict = torch.load(cfg.TRAIN.NET_G) netG.load_state_dict(state_dict) print('Load ', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('_') + 1 iend = cfg.TRAIN.NET_G.rfind('.') count = cfg.TRAIN.NET_G[istart:iend] count = int(count) + 1 if cfg.TRAIN.NET_D != '': for i in range(len(netsD)): print('Load %s_%d.pth' % (cfg.TRAIN.NET_D, i)) state_dict = torch.load('%s_%d.pth' % (cfg.TRAIN.NET_D, i)) netsD[i].load_state_dict(state_dict) if cfg.CUDA: netG.cuda() for i in range(len(netsD)): netsD[i].cuda() return netG, netsD, len(netsD), count
def build_models(self): netG = G_NET(len(self.cats_index_dict)) netINSD = INS_D_NET(len(self.cats_index_dict)) netGLBD = GLB_D_NET(len(self.cats_index_dict)) netG.apply(weights_init) netINSD.apply(weights_init) netGLBD.apply(weights_init) if cfg.CUDA: netG.cuda() netINSD.cuda() netGLBD.cuda() if len(cfg.GPU_IDS) > 1: netG = nn.DataParallel(netG) netG.to(self.device) netINSD = nn.DataParallel(netINSD) netINSD.to(self.device) netGLBD = nn.DataParallel(netGLBD) netGLBD.to(self.device) # ########################################################### # epoch = 0 if cfg.TRAIN.NET_G != '': state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', cfg.TRAIN.NET_G) filename = path_leaf(cfg.TRAIN.NET_G) istart = filename.rfind('_') + 1 iend = filename.rfind('.') epoch = filename[istart:iend] epoch = int(epoch) + 1 Gname = cfg.TRAIN.NET_G s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netINSD.pth' % (s_tmp) print('Load INSD from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netINSD.load_state_dict(state_dict) s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netGLBD.pth' % (s_tmp) print('Load GLBD from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netGLBD.load_state_dict(state_dict) return [netG, netINSD, netGLBD, epoch]
def load_checkpoint(modelpath): s_gpus = cfg.GPU_ID.split(',') gpus = [int(ix) for ix in s_gpus] torch.cuda.set_device(gpus[0]) state_dict = torch.load(modelpath, map_location=lambda storage, loc: storage) #print(checkpoint.keys()) #model = checkpoint['model'] #model.load_state_dict(checkpoint['state_dict']) #for parameter in model.parameters(): # parameter.requires_grad = False netG = G_NET() netG.apply(weights_init) netG = torch.nn.DataParallel(netG, device_ids=gpus) netG.load_state_dict(state_dict) netG.eval() return netG
def load_network(gpus): netG = G_NET(start_depth) netG.apply(weights_init) netG = torch.nn.DataParallel(netG, device_ids=gpus) print(netG) netsD = [] netsD.append(D_NET_BG(start_depth)) netsD.append(D_NET_PC(1, start_depth)) netsD.append(D_NET_PC(2, start_depth)) netsD.append(D_NET_BG_PG(start_depth)) for i in range(len(netsD)): netsD[i].apply(weights_init) netsD[i] = torch.nn.DataParallel(netsD[i], device_ids=gpus) print(netsD[i]) count = 0 if cfg.TRAIN.NET_G != '': state_dict = torch.load(cfg.TRAIN.NET_G) netG.load_state_dict(state_dict) print('Load ', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('netG_') + 5 iend = cfg.TRAIN.NET_G.rfind('_depth') count = cfg.TRAIN.NET_G[istart:iend] count = int(count) istart = cfg.TRAIN.NET_G.rfind('depth') iend = cfg.TRAIN.NET_G.rfind('.') _depth = cfg.TRAIN.NET_G[istart:iend] if cfg.TRAIN.NET_D != '': for i in range(len(netsD)): print('Load %s%d_%s.pth' % (cfg.TRAIN.NET_D, i, _depth)) state_dict = torch.load('%s%d_%s.pth' % (cfg.TRAIN.NET_D, i, _depth)) netsD[i].load_state_dict(state_dict) if cfg.CUDA: netG.cuda() for i in range(len(netsD)): netsD[i].cuda() return netG, netsD, len(netsD), count
def models(word_len): text_encoder = cache.get('text_encoder') if text_encoder is None: text_encoder = RNN_ENCODER(word_len, nhidden=256) state_dict = torch.load('../DAMSMencoders/coco/text_encoder100.pth', map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) text_encoder.cuda() text_encoder.eval() #cache.set('text_encoder', text_encoder, timeout=60 * 60 * 24) netG = cache.get('netG') if netG is None: netG = G_NET() state_dict = torch.load('../models/coco_AttnGAN2.pth', map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) if cfg.CUDA: netG.cuda() netG.eval() #cache.set('netG', netG, timeout=60 * 60 * 24) return text_encoder, netG
def load_checkpoint(modelpath): # s_gpus = cfg.GPU_ID.split(',') # gpus = [int(ix) for ix in s_gpus] # torch.cuda.set_device(gpus[0]) # state_dict = torch.load(modelpath, map_location=lambda storage, loc: storage) # netG = G_NET() # netG.apply(weights_init) # netG = torch.nn.DataParallel(netG, device_ids=gpus) # netG.load_state_dict(state_dict) # netG.eval() state_dict = torch.load(modelpath, map_location='cpu') new_state_dict = {} for k, v in state_dict.items(): new_state_dict[k[7:]] = v netG = G_NET() netG.load_state_dict(new_state_dict) netG.eval() return netG
def models(word_len): print( word_len ) text_encoder = cache.get('text_encoder') if text_encoder is None: print( "text_encoder not cached" ) if sys.argv[1].casefold() == 'rnn': text_encoder = RNN_ENCODER(word_len, nhidden=cfg.TEXT.EMBEDDING_DIM) elif sys.argv[1].casefold() == 'transformer': text_encoder = GPT2Model.from_pretrained( TRANSFORMER_ENCODER ) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) if cfg.CUDA: text_encoder.cuda() text_encoder.eval() cache.set('text_encoder', text_encoder, timeout=60 * 60 * 24) netG = cache.get('netG') if netG is None: print( "netG not cached" ) if cfg.GAN.B_STYLEGEN: netG = G_NET_STYLED() else: netG = G_NET() checkpoint = torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) if cfg.GAN.B_STYLEGEN: netG.w_ewma = checkpoint[ 'w_ewma' ] if cfg.CUDA: netG.w_ewma = netG.w_ewma.to( 'cuda:' + str( cfg.GPU_ID ) ) netG.load_state_dict( checkpoint[ 'netG_state_dict' ] ) else: netG.load_state_dict( checkpoint ) if cfg.CUDA: netG.cuda() netG.eval() cache.set('netG', netG, timeout=60 * 60 * 24) return text_encoder, netG
def models(word_len): #print(word_len) text_encoder = cache.get('text_encoder') if text_encoder is None: #print("text_encoder not cached") text_encoder = RNN_ENCODER(word_len, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) if cfg.CUDA: text_encoder.cuda() text_encoder.eval() cache.set('text_encoder', text_encoder, timeout=60 * 60 * 24) netG = cache.get('netG') if netG is None: netG = G_NET() state_dict = torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) if cfg.CUDA: netG.cuda() netG.eval() cache.set('netG', netG, timeout=60 * 60 * 24) return text_encoder, netG
def build_models(self): if cfg.TRAIN.NET_E == '': print('Error: no pretrained text-image encoders') return # vgg16 network style_loss = VGGNet() for p in style_loss.parameters(): p.requires_grad = False print("Load the style loss model") style_loss.eval() image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) for p in image_encoder.parameters(): p.requires_grad = False print('Load image encoder from:', img_encoder_path) image_encoder.eval() text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) for p in text_encoder.parameters(): p.requires_grad = False print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder.eval() netsD = [] if cfg.GAN.B_DCGAN: if cfg.TREE.BRANCH_NUM == 1: from model import D_NET64 as D_NET elif cfg.TREE.BRANCH_NUM == 2: from model import D_NET128 as D_NET else: # cfg.TREE.BRANCH_NUM == 3: from model import D_NET256 as D_NET netG = G_DCGAN() netsD = [D_NET(b_jcu=False)] else: from model import D_NET64, D_NET128, D_NET256 netG = G_NET() if cfg.TREE.BRANCH_NUM > 0: netsD.append(D_NET64()) if cfg.TREE.BRANCH_NUM > 1: netsD.append(D_NET128()) if cfg.TREE.BRANCH_NUM > 2: netsD.append(D_NET256()) netG.apply(weights_init) for i in range(len(netsD)): netsD[i].apply(weights_init) print('# of netsD', len(netsD)) # epoch = 0 if cfg.TRAIN.NET_G != '': state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('_') + 1 iend = cfg.TRAIN.NET_G.rfind('.') epoch = cfg.TRAIN.NET_G[istart:iend] epoch = int(epoch) + 1 if cfg.TRAIN.B_NET_D: Gname = cfg.TRAIN.NET_G for i in range(len(netsD)): s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netD%d.pth' % (s_tmp, i) print('Load D from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netsD[i].load_state_dict(state_dict) # Create a target network. target_netG = deepcopy(netG) if cfg.CUDA: text_encoder = text_encoder.cuda() image_encoder = image_encoder.cuda() style_loss = style_loss.cuda() # The target network is stored on the scondary GPU.--------------------------------- target_netG.cuda(secondary_device) target_netG.ca_net.device = secondary_device #----------------------------------------------------------------------------------- netG.cuda() for i in range(len(netsD)): netsD[i] = netsD[i].cuda() # Disable training in the target network: for p in target_netG.parameters(): p.requires_grad = False return [ text_encoder, image_encoder, netG, target_netG, netsD, epoch, style_loss ]
def sampling(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for models is not found!') else: if split_dir == 'test': split_dir = 'valid' if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.cuda() netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 idx = 0 ### avg_ddva = 0 for _ in range(1): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 100 == 0: print('step: ', step) captions, cap_lens, imperfect_captions, imperfect_cap_lens, misc = data # Generate images for human-text ---------------------------------------------------------------- data_human = [captions, cap_lens, misc] imgs, captions, cap_lens, class_ids, keys, wrong_caps,\ wrong_caps_len, wrong_cls_id= prepare_data(data_human) hidden = text_encoder.init_hidden(batch_size) words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] noise.data.normal_(0, 1) fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs, mask) # Generate images for imperfect caption-text------------------------------------------------------- data_imperfect = [ imperfect_captions, imperfect_cap_lens, misc ] imgs, imperfect_captions, imperfect_cap_lens, class_ids, imperfect_keys, wrong_caps,\ wrong_caps_len, wrong_cls_id = prepare_data(data_imperfect) hidden = text_encoder.init_hidden(batch_size) words_embs, sent_emb = text_encoder( imperfect_captions, imperfect_cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (imperfect_captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] noise.data.normal_(0, 1) imperfect_fake_imgs, _, _, _ = netG( noise, sent_emb, words_embs, mask) # Sort the results by keys to align ---------------------------------------------------------------- keys, captions, cap_lens, fake_imgs, _, _ = sort_by_keys( keys, captions, cap_lens, fake_imgs, None, None) imperfect_keys, imperfect_captions, imperfect_cap_lens, imperfect_fake_imgs, true_imgs, _ = \ sort_by_keys(imperfect_keys, imperfect_captions, imperfect_cap_lens, imperfect_fake_imgs,\ imgs, None) # Shift device for the imgs, target_imgs and imperfect_imgs------------------------------------------------ for i in range(len(imgs)): imgs[i] = imgs[i].to(secondary_device) imperfect_fake_imgs[i] = imperfect_fake_imgs[i].to( secondary_device) fake_imgs[i] = fake_imgs[i].to(secondary_device) for j in range(batch_size): s_tmp = '%s/single' % (save_dir) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) k = -1 im = fake_imgs[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) cap_im = imperfect_fake_imgs[k][j].data.cpu().numpy() cap_im = (cap_im + 1.0) * 127.5 cap_im = cap_im.astype(np.uint8) cap_im = np.transpose(cap_im, (1, 2, 0)) # Uncomment to scale true image true_im = true_imgs[k][j].data.cpu().numpy() true_im = (true_im + 1.0) * 127.5 true_im = true_im.astype(np.uint8) true_im = np.transpose(true_im, (1, 2, 0)) # Uncomment to save images. #true_im = Image.fromarray(true_im) #fullpath = '%s_true_s%d.png' % (s_tmp, idx) #true_im.save(fullpath) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, idx) im.save(fullpath) #cap_im = Image.fromarray(cap_im) #fullpath = '%s_imperfect_s%d.png' % (s_tmp, idx) idx = idx + 1 #cap_im.save(fullpath) neg_ddva = negative_ddva( imperfect_fake_imgs, imgs, fake_imgs, reduce='mean', final_only=True).data.cpu().numpy() avg_ddva += neg_ddva * (-1) #text_caps = [[self.ixtoword[word] for word in sent if word!=0] for sent in captions.tolist()] #imperfect_text_caps = [[self.ixtoword[word] for word in sent if word!=0] for sent in # imperfect_captions.tolist()] print(step) avg_ddva = avg_ddva / (step + 1) print('\n\nAvg_DDVA: ', avg_ddva)
def embedding(self, split_dir, model): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) if cfg.GPU_ID != -1: netG.cuda() netG.eval() # model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') print(img_encoder_path) print('Load image encoder from:', img_encoder_path) state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) if cfg.GPU_ID != -1: image_encoder = image_encoder.cuda() image_encoder.eval() print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) if cfg.GPU_ID != -1: text_encoder = text_encoder.cuda() text_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM with torch.no_grad(): noise = Variable(torch.FloatTensor(batch_size, nz)) if cfg.GPU_ID != -1: noise = noise.cuda() # the path to save generated images save_dir = model_dir[:model_dir.rfind('.pth')] cnt = 0 # new if cfg.TRAIN.CLIP_SENTENCODER: print("Use CLIP SentEncoder for sampling") img_features = dict() txt_features = dict() with torch.no_grad(): for _ in range(1): # (cfg.TEXT.CAPTIONS_PER_IMAGE): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 100 == 0: print('step: ', step) imgs, captions, cap_lens, class_ids, keys, texts = prepare_data( data) hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] if cfg.TRAIN.CLIP_SENTENCODER: # random select one paragraph for each training example sents = [] for idx in range(len(texts)): sents_per_image = texts[idx].split( '\n') # new 3/11 if len(sents_per_image) > 1: sent_ix = np.random.randint( 0, len(sents_per_image) - 1) else: sent_ix = 0 sents.append(sents_per_image[0]) # print('sents: ', sents) sent = clip.tokenize(sents) # .to(device) # load clip #model = torch.jit.load("model.pt").cuda().eval() sent_input = sent if cfg.GPU_ID != -1: sent_input = sent.cuda() # print("text input", sent_input) sent_emb_clip = model.encode_text( sent_input).float() if CLIP: sent_emb = sent_emb_clip ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs, mask) if CLIP: images = [] for j in range(fake_imgs[-1].shape[0]): image = fake_imgs[-1][j].cpu().clone() image = image.squeeze(0) unloader = transforms.ToPILImage() image = unloader(image) image = preprocess( image.convert("RGB")) # 256*256 -> 224*224 images.append(image) image_mean = torch.tensor( [0.48145466, 0.4578275, 0.40821073]).cuda() image_std = torch.tensor( [0.26862954, 0.26130258, 0.27577711]).cuda() image_input = torch.tensor(np.stack(images)).cuda() image_input -= image_mean[:, None, None] image_input /= image_std[:, None, None] cnn_codes = model.encode_image(image_input).float() else: region_features, cnn_codes = image_encoder( fake_imgs[-1]) for j in range(batch_size): cnn_code = cnn_codes[j] temp = keys[j].replace('b', '').replace("'", '') img_features[temp] = cnn_code.cpu().numpy() txt_features[temp] = sent_emb[j].cpu().numpy() with open(save_dir + ".pkl", 'wb') as f: pickle.dump(img_features, f) with open(save_dir + "_text.pkl", 'wb') as f: pickle.dump(txt_features, f)
def sampling(self, split_dir, model): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) if cfg.GPU_ID != -1: netG.cuda() netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) if cfg.GPU_ID != -1: text_encoder = text_encoder.cuda() text_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM with torch.no_grad(): noise = Variable(torch.FloatTensor(batch_size, nz)) if cfg.GPU_ID != -1: noise = noise.cuda() model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) # state_dict = torch.load(cfg.TRAIN.NET_G) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 #new if cfg.TRAIN.CLIP_SENTENCODER: print("Use CLIP SentEncoder for sampling") for _ in range(1): # (cfg.TEXT.CAPTIONS_PER_IMAGE): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 100 == 0: print('step: ', step) # if step > 50: # break #imgs, captions, cap_lens, class_ids, keys = prepare_data(data) #new imgs, captions, cap_lens, class_ids, keys, texts = prepare_data( data) hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] # new if cfg.TRAIN.CLIP_SENTENCODER: # random select one paragraph for each training example sents = [] for idx in range(len(texts)): sents_per_image = texts[idx].split( '\n') # new 3/11 if len(sents_per_image) > 1: sent_ix = np.random.randint( 0, len(sents_per_image) - 1) else: sent_ix = 0 sents.append(sents_per_image[sent_ix]) with open('%s/%s' % (save_dir, 'eval_sents.txt'), 'a+') as f: f.write(sents_per_image[sent_ix] + '\n') # print('sents: ', sents) sent = clip.tokenize(sents) # .to(device) # load clip #model = torch.jit.load("model.pt").cuda().eval() sent_input = sent if cfg.GPU_ID != -1: sent_input = sent.cuda() # print("text input", sent_input) with torch.no_grad(): sent_emb = model.encode_text(sent_input).float() ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs, mask) for j in range(batch_size): s_tmp = '%s/fake/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) print('Make a new folder: ', f'{save_dir}/real') mkdir_p(f'{save_dir}/real') print('Make a new folder: ', f'{save_dir}/text') mkdir_p(f'{save_dir}/text') k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, k) im.save(fullpath) temp = keys[j].replace('b', '').replace("'", '') shutil.copy(f"../data/Face/images/{temp}.jpg", f"{save_dir}/real/") shutil.copy(f"../data/Face/text/{temp}.txt", f"{save_dir}/text/")
def genDiscOutputs(self, split_dir, num_samples=57140): if cfg.TRAIN.NET_G == '': logger.error('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.to(cfg.DEVICE) netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) ###HACK state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) text_encoder = text_encoder.to(cfg.DEVICE) text_encoder.eval() logger.info('Loaded text encoder from: %s', cfg.TRAIN.NET_E) batch_size = self.batch_size[0] nz = cfg.GAN.GLOBAL_Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)).to(cfg.DEVICE) local_noise = Variable( torch.FloatTensor(batch_size, cfg.GAN.LOCAL_Z_DIM)).to(cfg.DEVICE) model_dir = cfg.TRAIN.NET_G state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict["netG"]) for keys in state_dict.keys(): print(keys) logger.info('Load G from: %s', model_dir) max_objects = 3 from model import D_NET256 netD = D_NET256() netD.load_state_dict(state_dict["netD"][2]) netD.eval() # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')].split("/")[-1] save_dir = '%s/%s/%s' % ("../output", s_tmp, split_dir) mkdir_p(save_dir) logger.info("Saving images to: {}".format(save_dir)) number_batches = num_samples // batch_size if number_batches < 1: number_batches = 1 data_iter = iter(self.data_loader) real_labels, fake_labels, match_labels = self.prepare_labels() for step in tqdm(range(number_batches)): data = data_iter.next() imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot, _ = prepare_data( data, eval=True) transf_matrices = transformation_matrices[0] transf_matrices_inv = transformation_matrices[1] hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach(), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) local_noise.data.normal_(0, 1) inputs = (noise, local_noise, sent_emb, words_embs, mask, transf_matrices, transf_matrices_inv, label_one_hot, max_objects) inputs = tuple( (inp.to(cfg.DEVICE) if isinstance(inp, torch.Tensor ) else inp) for inp in inputs) with torch.no_grad(): fake_imgs, _, mu, logvar = netG(*inputs) inputs = (fake_imgs, fake_labels, transf_matrices, transf_matrices_inv, max_objects) codes = netsD[-1].partial_forward(*inputs)
def build_models(self): def count_parameters(model): total_param = 0 for name, param in model.named_parameters(): if param.requires_grad: num_param = np.prod(param.size()) if param.dim() > 1: print(name, ':', 'x'.join(str(x) for x in list(param.size())), '=', num_param) else: print(name, ':', num_param) total_param += num_param return total_param # ###################encoders######################################## # if cfg.TRAIN.NET_E == '': print('Error: no pretrained text-image encoders') return image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) for p in image_encoder.parameters(): p.requires_grad = False print('Load image encoder from:', img_encoder_path) image_encoder.eval() text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) for p in text_encoder.parameters(): p.requires_grad = False print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder.eval() # #######################generator and discriminators############## # netsD = [] if cfg.GAN.B_DCGAN: if cfg.TREE.BRANCH_NUM == 1: from model import D_NET64 as D_NET elif cfg.TREE.BRANCH_NUM == 2: from model import D_NET128 as D_NET else: # cfg.TREE.BRANCH_NUM == 3: from model import D_NET256 as D_NET # TODO: elif cfg.TREE.BRANCH_NUM > 3: netG = G_DCGAN() netsD = [D_NET(b_jcu=False)] else: from model import D_NET64, D_NET128, D_NET256 netG = G_NET() if cfg.TREE.BRANCH_NUM > 0: netsD.append(D_NET64()) if cfg.TREE.BRANCH_NUM > 1: netsD.append(D_NET128()) if cfg.TREE.BRANCH_NUM > 2: netsD.append(D_NET256()) # TODO: if cfg.TREE.BRANCH_NUM > 3: print('number of trainable parameters =', count_parameters(netG)) print('number of trainable parameters =', count_parameters(netsD[-1])) netG.apply(weights_init) # print(netG) for i in range(len(netsD)): netsD[i].apply(weights_init) # print(netsD[i]) print('# of netsD', len(netsD)) # epoch = 0 if cfg.TRAIN.NET_G != '': state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('_') + 1 iend = cfg.TRAIN.NET_G.rfind('.') epoch = cfg.TRAIN.NET_G[istart:iend] epoch = int(epoch) + 1 if cfg.TRAIN.B_NET_D: Gname = cfg.TRAIN.NET_G for i in range(len(netsD)): s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netD%d.pth' % (s_tmp, i) print('Load D from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netsD[i].load_state_dict(state_dict) # ########################################################### # if cfg.CUDA: text_encoder = text_encoder.cuda() image_encoder = image_encoder.cuda() netG.cuda() for i in range(len(netsD)): netsD[i].cuda() return [text_encoder, image_encoder, netG, netsD, epoch]
def build_models(self): # ###################encoders######################################## # if cfg.TRAIN.NET_E == '': print('Error: no pretrained text-image encoders') return image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) for p in image_encoder.parameters(): p.requires_grad = False print('Load image encoder from:', img_encoder_path) image_encoder.eval() text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) for p in text_encoder.parameters(): p.requires_grad = False print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder.eval() # #######################generator and discriminators############## # netsD = [] from model import D_NET64, D_NET128, D_NET256 netG = G_NET() if cfg.TREE.BRANCH_NUM > 0: netsD.append(D_NET64()) if cfg.TREE.BRANCH_NUM > 1: netsD.append(D_NET128()) if cfg.TREE.BRANCH_NUM > 2: netsD.append(D_NET256()) netG.apply(weights_init) # print(netG) for i in range(len(netsD)): netsD[i].apply(weights_init) # print(netsD[i]) print('# of netsD', len(netsD)) epoch = 0 if self.resume: checkpoint_list = sorted([ckpt for ckpt in glob.glob(self.model_dir + "/" + '*.pth')]) latest_checkpoint = checkpoint_list[-1] state_dict = torch.load(latest_checkpoint, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict["netG"]) for i in range(len(netsD)): netsD[i].load_state_dict(state_dict["netD"][i]) epoch = int(latest_checkpoint[-8:-4]) + 1 print("Resuming training from checkpoint {} at epoch {}.".format(latest_checkpoint, epoch)) # if cfg.TRAIN.NET_G != '': state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('_') + 1 iend = cfg.TRAIN.NET_G.rfind('.') epoch = cfg.TRAIN.NET_G[istart:iend] epoch = int(epoch) + 1 if cfg.TRAIN.B_NET_D: Gname = cfg.TRAIN.NET_G for i in range(len(netsD)): s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netD%d.pth' % (s_tmp, i) print('Load D from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netsD[i].load_state_dict(state_dict) # ########################################################### # if cfg.CUDA: text_encoder = text_encoder.cuda() image_encoder = image_encoder.cuda() netG.cuda() for i in range(len(netsD)): netsD[i].cuda() return [text_encoder, image_encoder, netG, netsD, epoch]
def sampling(self, split_dir, num_samples=30000): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.cuda() netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)) noise = noise.cuda() model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) # state_dict = torch.load(cfg.TRAIN.NET_G) netG.load_state_dict(state_dict["netG"]) print('Load G from: ', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 for _ in range(1): # (cfg.TEXT.CAPTIONS_PER_IMAGE): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 10000 == 0: print('step: ', step) if step >= num_samples: break imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot = prepare_data(data) transf_matrices_inv = transformation_matrices[1] hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach(), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) inputs = (noise, sent_emb, words_embs, mask, transf_matrices_inv, label_one_hot) with torch.no_grad(): fake_imgs, _, mu, logvar = nn.parallel.data_parallel(netG, inputs, self.gpus) for j in range(batch_size): s_tmp = '%s/single/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, k) im.save(fullpath)
cfg_from_file('./stackGAN_code/cfg/eval_birds.yml') save_dir = './display' txt_dir = './embeddings/txt_embedding.t7' manualSeed = random.randint(1, 120) random.seed(manualSeed) torch.manual_seed(manualSeed) torch.cuda.manual_seed_all(manualSeed) gpus = [0] num_gpus = 1 torch.cuda.set_device(gpus[0]) cudnn.benchmark = True batch_size = 2 netG = G_NET() netG.apply(weights_init) netG = torch.nn.DataParallel(netG, device_ids=gpus) state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)) netG.cuda() netG.eval() noise = noise.cuda() t_embeddings = load_lua(txt_dir)
def build_models(self): # ###################encoders######################################## # if cfg.TRAIN.NET_E == '': print('Error: no pretrained text-image encoders') return image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) for p in image_encoder.parameters(): p.requires_grad = False print('Load image encoder from:', img_encoder_path) image_encoder.eval() text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) for p in text_encoder.parameters(): p.requires_grad = False print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder.eval() # #######################generator and discriminators############## # netsD = [] if cfg.GAN.B_DCGAN: if cfg.TREE.BRANCH_NUM == 1: from model import D_NET64 as D_NET elif cfg.TREE.BRANCH_NUM == 2: from model import D_NET128 as D_NET else: # cfg.TREE.BRANCH_NUM == 3: from model import D_NET256 as D_NET # TODO: elif cfg.TREE.BRANCH_NUM > 3: netG = G_DCGAN() netsD = [D_NET(b_jcu=False)] else: from model import D_NET64, D_NET128, D_NET256 netG = G_NET() if cfg.TREE.BRANCH_NUM > 0: netsD.append(D_NET64()) if cfg.TREE.BRANCH_NUM > 1: netsD.append(D_NET128()) if cfg.TREE.BRANCH_NUM > 2: netsD.append(D_NET256()) # TODO: if cfg.TREE.BRANCH_NUM > 3: netG.apply(weights_init) # print(netG) for i in range(len(netsD)): netsD[i].apply(weights_init) # print(netsD[i]) print('# of netsD', len(netsD)) # epoch = 0 # MODIFIED if cfg.PRETRAINED_G != '': state_dict = torch.load(cfg.PRETRAINED_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', cfg.PRETRAINED_G) if cfg.TRAIN.B_NET_D: Gname = cfg.PRETRAINED_G s_tmp = Gname[:Gname.rfind('/')] for i in range(len(netsD)): Dname = '%s/netD%d.pth' % ( s_tmp, i ) # the name of Ds should be consistent and differ from each other in i print('Load D from: ', Dname) state_dict = torch.load( Dname, map_location=lambda storage, loc: storage) netsD[i].load_state_dict(state_dict) if cfg.TRAIN.NET_G != '': state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('_') + 1 iend = cfg.TRAIN.NET_G.rfind('.') epoch = cfg.TRAIN.NET_G[istart:iend] epoch = int(epoch) + 1 if cfg.TRAIN.B_NET_D: Gname = cfg.TRAIN.NET_G for i in range(len(netsD)): s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netD%d.pth' % (s_tmp, i) print('Load D from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netsD[i].load_state_dict(state_dict) # ########################################################### # if cfg.CUDA: text_encoder = text_encoder.cuda() image_encoder = image_encoder.cuda() netG.cuda() for i in range(len(netsD)): netsD[i].cuda() return [text_encoder, image_encoder, netG, netsD, epoch]
def evaluate(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: # Build and load the generator if split_dir == 'test': split_dir = 'valid' netG = G_NET() netG.apply(weights_init) netG = torch.nn.DataParallel(netG, device_ids=self.gpus) print(netG) # state_dict = torch.load(cfg.TRAIN.NET_G) state_dict = \ torch.load('/content/drive/My Drive/Colab Notebooks/StackGAN-v2-master/models/netG_210000.pth', map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load ', cfg.TRAIN.NET_G) # the path to save generated images s_tmp = cfg.TRAIN.NET_G istart = s_tmp.rfind('_') + 1 iend = s_tmp.rfind('.') iteration = int(s_tmp[istart:iend]) s_tmp = s_tmp[:s_tmp.rfind('/')] save_dir = '%s/iteration%d' % (s_tmp, iteration) nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(self.batch_size, nz)) if cfg.CUDA: netG.cuda() noise = noise.cuda() # switch to evaluate mode netG.eval() for step, data in enumerate(self.data_loader, 0): imgs, t_embeddings, filenames = data if cfg.CUDA: t_embeddings = Variable(t_embeddings).cuda() else: t_embeddings = Variable(t_embeddings) # print(t_embeddings[:, 0, :], t_embeddings.size(1)) embedding_dim = t_embeddings.size(1) batch_size = imgs[0].size(0) noise.data.resize_(batch_size, nz) noise.data.normal_(0, 1) fake_img_list = [] for i in range(embedding_dim): fake_imgs, _, _ = netG(noise, t_embeddings[:, i, :]) if cfg.TEST.B_EXAMPLE: # fake_img_list.append(fake_imgs[0].data.cpu()) # fake_img_list.append(fake_imgs[1].data.cpu()) fake_img_list.append(fake_imgs[2].data.cpu()) else: self.save_singleimages(fake_imgs[-1], filenames, save_dir, split_dir, i, 256) # self.save_singleimages(fake_imgs[-2], filenames, # save_dir, split_dir, i, 128) # self.save_singleimages(fake_imgs[-3], filenames, # save_dir, split_dir, i, 64) # break if cfg.TEST.B_EXAMPLE: # self.save_superimages(fake_img_list, filenames, # save_dir, split_dir, 64) # self.save_superimages(fake_img_list, filenames, # save_dir, split_dir, 128) self.save_superimages(fake_img_list, filenames, save_dir, split_dir, 256)
def gen_example(n_words, wordtoix, ixtoword, model_dir): '''generate images from example sentences''' # filepath = 'example_captions.txt' filepath = 'caption.txt' data_dic = {} with open(filepath, "r") as f: filenames = f.read().split('\n') captions = [] cap_lens = [] for sent in filenames: if len(sent) == 0: continue sent = sent.replace("\ufffd\ufffd", " ") tokenizer = RegexpTokenizer(r'\w+') tokens = tokenizer.tokenize(sent.lower()) if len(tokens) == 0: print('sentence token == 0 !') continue rev = [] for t in tokens: t = t.encode('ascii', 'ignore').decode('ascii') if len(t) > 0 and t in wordtoix: rev.append(wordtoix[t]) captions.append(rev) cap_lens.append(len(rev)) max_len = np.max(cap_lens) sorted_indices = np.argsort(cap_lens)[::-1] cap_lens = np.asarray(cap_lens) cap_lens = cap_lens[sorted_indices] cap_array = np.zeros((len(captions), max_len), dtype='int64') for i in range(len(captions)): idx = sorted_indices[i] cap = captions[idx] c_len = len(cap) cap_array[i, :c_len] = cap # key = name[(name.rfind('/') + 1):] key = 0 data_dic[key] = [cap_array, cap_lens, sorted_indices] # algo.gen_example(data_dic) text_encoder = RNN_ENCODER(n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder.eval() netG = G_NET() netG.apply(weights_init) # netG.cuda() netG.eval() state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) save_dir = 'results' mkdir_p(save_dir) for key in data_dic: captions, cap_lens, sorted_indices = data_dic[key] batch_size = captions.shape[0] nz = cfg.GAN.Z_DIM with torch.no_grad(): captions = Variable(torch.from_numpy(captions)) cap_lens = Variable(torch.from_numpy(cap_lens)) # captions = captions.cuda() # cap_lens = cap_lens.cuda() for i in range(image_per_caption): # 16 with torch.no_grad(): noise = Variable(torch.FloatTensor(batch_size, nz)) # noise = noise.cuda() # (1) Extract text embeddings hidden = text_encoder.init_hidden(batch_size) words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) mask = (captions == 0) # (2) Generate fake images noise.data.normal_(0, 1) fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask) cap_lens_np = cap_lens.data.numpy() for j in range(batch_size): save_name = '%s/%d_%d' % (save_dir, i, sorted_indices[j]) for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) fullpath = '%s_g%d.png' % (save_name, k) im.save(fullpath) for k in range(len(attention_maps)): if len(fake_imgs) > 1: im = fake_imgs[k + 1] else: im = fake_imgs[0] attn_maps = attention_maps[k] att_sze = attn_maps.size(2) img_set, sentences = \ build_super_images2(im[j].unsqueeze(0), captions[j].unsqueeze(0), [cap_lens_np[j]], ixtoword, [attn_maps[j]], att_sze) if img_set is not None: im = Image.fromarray(img_set) fullpath = '%s_a%d_attention.png' % (save_name, k) im.save(fullpath)
def sampling(self, split_dir, num_samples=30000): if cfg.TRAIN.NET_G == '': logger.error('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.to(cfg.DEVICE) netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) text_encoder = text_encoder.to(cfg.DEVICE) text_encoder.eval() logger.info('Loaded text encoder from: %s', cfg.TRAIN.NET_E) batch_size = self.batch_size[0] nz = cfg.GAN.GLOBAL_Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)).to(cfg.DEVICE) local_noise = Variable(torch.FloatTensor(batch_size, cfg.GAN.LOCAL_Z_DIM)).to(cfg.DEVICE) model_dir = cfg.TRAIN.NET_G state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict["netG"]) max_objects = 10 logger.info('Load G from: %s', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')].split("/")[-1] save_dir = '%s/%s/%s' % ("../output", s_tmp, split_dir) mkdir_p(save_dir) logger.info("Saving images to: {}".format(save_dir)) number_batches = num_samples // batch_size if number_batches < 1: number_batches = 1 data_iter = iter(self.data_loader) for step in tqdm(range(number_batches)): data = data_iter.next() imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot, _ = prepare_data( data, eval=True) transf_matrices = transformation_matrices[0] transf_matrices_inv = transformation_matrices[1] hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach(), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) local_noise.data.normal_(0, 1) inputs = (noise, local_noise, sent_emb, words_embs, mask, transf_matrices, transf_matrices_inv, label_one_hot, max_objects) inputs = tuple((inp.to(cfg.DEVICE) if isinstance(inp, torch.Tensor) else inp) for inp in inputs) with torch.no_grad(): fake_imgs, _, mu, logvar = netG(*inputs) for batch_idx, j in enumerate(range(batch_size)): s_tmp = '%s/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): logger.info('Make a new folder: %s', folder) mkdir_p(folder) k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, step*batch_size+batch_idx) im.save(fullpath)
def build_models(self): # text encoders if cfg.TRAIN.NET_E == '': print('Error: no pretrained text-image encoders') return image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) for p in image_encoder.parameters(): p.requires_grad = False print('Load image encoder from:', img_encoder_path) image_encoder.eval() # self.n_words = 156 text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) for p in text_encoder.parameters(): p.requires_grad = False print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder.eval() # Caption models - cnn_encoder and rnn_decoder caption_cnn = CAPTION_CNN(cfg.CAP.embed_size) caption_cnn.load_state_dict(torch.load(cfg.CAP.caption_cnn_path, map_location=lambda storage, loc: storage)) for p in caption_cnn.parameters(): p.requires_grad = False print('Load caption model from:', cfg.CAP.caption_cnn_path) caption_cnn.eval() # self.n_words = 9 caption_rnn = CAPTION_RNN(cfg.CAP.embed_size, cfg.CAP.hidden_size * 2, self.n_words, cfg.CAP.num_layers) # caption_rnn = CAPTION_RNN(cfg.CAP.embed_size, cfg.CAP.hidden_size * 2, self.n_words, cfg.CAP.num_layers) caption_rnn.load_state_dict(torch.load(cfg.CAP.caption_rnn_path, map_location=lambda storage, loc: storage)) for p in caption_rnn.parameters(): p.requires_grad = False print('Load caption model from:', cfg.CAP.caption_rnn_path) # Generator and Discriminator: netsD = [] if cfg.GAN.B_DCGAN: if cfg.TREE.BRANCH_NUM == 1: from model import D_NET64 as D_NET elif cfg.TREE.BRANCH_NUM == 2: from model import D_NET128 as D_NET else: # cfg.TREE.BRANCH_NUM == 3: from model import D_NET256 as D_NET netG = G_DCGAN() netsD = [D_NET(b_jcu=False)] else: from model import D_NET64, D_NET128, D_NET256 netG = G_NET() if cfg.TREE.BRANCH_NUM > 0: netsD.append(D_NET64()) if cfg.TREE.BRANCH_NUM > 1: netsD.append(D_NET128()) if cfg.TREE.BRANCH_NUM > 2: netsD.append(D_NET256()) netG.apply(weights_init) # print(netG) for i in range(len(netsD)): netsD[i].apply(weights_init) # print(netsD[i]) print('# of netsD', len(netsD)) epoch = 0 if cfg.TRAIN.NET_G != '': state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('_') + 1 iend = cfg.TRAIN.NET_G.rfind('.') epoch = cfg.TRAIN.NET_G[istart:iend] # print(epoch) # print(state_dict.keys()) # print(netG.keys()) # epoch = state_dict['epoch'] epoch = int(epoch) + 1 # epoch = 187 if cfg.TRAIN.B_NET_D: Gname = cfg.TRAIN.NET_G for i in range(len(netsD)): s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netD%d.pth' % (s_tmp, i) print('Load D from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netsD[i].load_state_dict(state_dict) if cfg.CUDA: text_encoder = text_encoder.cuda() image_encoder = image_encoder.cuda() caption_cnn = caption_cnn.cuda() caption_rnn = caption_rnn.cuda() netG.cuda() for i in range(len(netsD)): netsD[i].cuda() return [text_encoder, image_encoder, caption_cnn, caption_rnn, netG, netsD, epoch]
def sample(self, split_dir, num_samples=25, draw_bbox=False): from PIL import Image, ImageDraw, ImageFont import cPickle as pickle import torchvision import torchvision.utils as vutils if cfg.TRAIN.NET_G == '': print('Error: the path for model NET_G is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() batch_size = cfg.TRAIN.BATCH_SIZE nz = cfg.GAN.Z_DIM model_dir = cfg.TRAIN.NET_G state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) # state_dict = torch.load(cfg.TRAIN.NET_G) netG = G_NET() print('Load G from: ', model_dir) netG.apply(weights_init) netG.load_state_dict(state_dict["netG"]) netG.cuda() netG.eval() # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s_%s' % (s_tmp, split_dir) mkdir_p(save_dir) ####################################### noise = Variable(torch.FloatTensor(9, nz)) imsize = 256 for step, data in enumerate(self.data_loader, 0): if step >= num_samples: break imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot, bbox = \ prepare_data(data, eval=True) transf_matrices_inv = transformation_matrices[1][0].unsqueeze(0) label_one_hot = label_one_hot[0].unsqueeze(0) img = imgs[-1][0] val_image = img.view(1, 3, imsize, imsize) hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) words_embs, sent_emb = words_embs[0].unsqueeze(0).detach(), sent_emb[0].unsqueeze(0).detach() words_embs = words_embs.repeat(9, 1, 1) sent_emb = sent_emb.repeat(9, 1) mask = (captions == 0) mask = mask[0].unsqueeze(0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] mask = mask.repeat(9, 1) transf_matrices_inv = transf_matrices_inv.repeat(9, 1, 1, 1) label_one_hot = label_one_hot.repeat(9, 1, 1) ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) inputs = (noise, sent_emb, words_embs, mask, transf_matrices_inv, label_one_hot) with torch.no_grad(): fake_imgs, _, mu, logvar = nn.parallel.data_parallel(netG, inputs, self.gpus) data_img = torch.FloatTensor(10, 3, imsize, imsize).fill_(0) data_img[0] = val_image data_img[1:10] = fake_imgs[-1] if draw_bbox: for idx in range(3): x, y, w, h = tuple([int(imsize*x) for x in bbox[0, idx]]) w = imsize-1 if w > imsize-1 else w h = imsize-1 if h > imsize-1 else h if x <= -1: break data_img[:10, :, y, x:x + w] = 1 data_img[:10, :, y:y + h, x] = 1 data_img[:10, :, y+h, x:x + w] = 1 data_img[:10, :, y:y + h, x + w] = 1 # get caption cap = captions[0].data.cpu().numpy() sentence = "" for j in range(len(cap)): if cap[j] == 0: break word = self.ixtoword[cap[j]].encode('ascii', 'ignore').decode('ascii') sentence += word + " " sentence = sentence[:-1] vutils.save_image(data_img, '{}/{}_{}.png'.format(save_dir, sentence, step), normalize=True, nrow=10) print("Saved {} files to {}".format(step, save_dir))
def evaluate(self, split_dir): inception_model = INCEPTION_V3() # fid_model = FID_INCEPTION() if cfg.CUDA: inception_model.cuda() # fid_model.cuda() inception_model.eval() # fid_model.eval() if cfg.TRAIN.NET_G == '': print('Error: the path for models is not found!') else: # Build and load the generator if split_dir == 'test': split_dir = 'valid' netG = G_NET() netG.apply(weights_init) netG = torch.nn.DataParallel(netG, device_ids=self.gpus) # print(netG) # state_dict = torch.load(cfg.TRAIN.NET_G) state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load ', cfg.TRAIN.NET_G) # the path to save generated images # s_tmp = cfg.TRAIN.NET_G # istart = s_tmp.rfind('_') + 1 # iend = s_tmp.rfind('.') # iteration = int(s_tmp[istart:iend]) # s_tmp = s_tmp[:s_tmp.rfind('/')] # save_dir = '%s/iteration%d' % (s_tmp, iteration) # save_dir = 'C:\\Users\\alper\\PycharmProjects\\MSGAN\\StackGAN++-Mode-Seeking\\results' save_dir = "D:\\results" nz = cfg.GAN.Z_DIM n_samples = 50 # noise = Variable(torch.FloatTensor(self.batch_size, nz)) noise = Variable(torch.FloatTensor(n_samples, nz)) if cfg.CUDA: netG.cuda() noise = noise.cuda() # switch to evaluate mode netG.eval() for step, data in enumerate(tqdm(self.data_loader)): # if step == 8: # break imgs, t_embeddings, filenames = data if cfg.CUDA: t_embeddings = Variable(t_embeddings).cuda() else: t_embeddings = Variable(t_embeddings) # print(t_embeddings[:, 0, :], t_embeddings.size(1)) embedding_dim = t_embeddings.size(1) # batch_size = imgs[0].size(0) # noise.data.resize_(batch_size, nz) noise.data.normal_(0, 1) fake_img_list = [] inception_score_list = [] fid_list = [] score_list = [] predictions = [] fids = [] for i in range(embedding_dim): inception_score_list.append([]) fid_list.append([]) score_list.append([]) emb_imgs = [] for j in range(n_samples): noise_j = noise[j].unsqueeze(0) t_embeddings_i = t_embeddings[:, i, :] fake_imgs, _, _ = netG(noise_j, t_embeddings_i) # filenames_number ='_sample_%2.2d'%(j) # filenames_new = [] # filenames_new.append(filenames[-1]+filenames_number) # filenames_new = tuple(filenames_new) # for selecting reasonable images pred = inception_model(fake_imgs[-1].detach()) pred = pred.data.cpu().numpy() predictions.append(pred) bird_indices = [ 7, 8, 9, 10, 11, 13, 15, 16, 17, 18, 19, 21, 23, 81, 84, 85, 86, 88, 90, 91, 93, 94, 95, 96, 97, 99, 129, 130, 133, 134, 135, 138, 141, 142, 143, 144, 146, 517 ] score = np.max(pred[0, bird_indices]) score_list[i].append((j, score)) emb_imgs.append(fake_imgs[2].data.cpu()) if cfg.TEST.B_EXAMPLE: # fake_img_list.append(fake_imgs[0].data.cpu()) # fake_img_list.append(fake_imgs[1].data.cpu()) fake_img_list.append(fake_imgs[2].data.cpu()) else: self.save_singleimages(fake_imgs[-1], filenames, j, save_dir, split_dir, i, 256) # self.save_singleimages(fake_imgs[-2], filenames, # save_dir, split_dir, i, 128) # self.save_singleimages(fake_imgs[-3], filenames, # save_dir, split_dir, i, 64) # break score_list[i] = sorted(score_list[i], key=lambda x: x[1], reverse=True)[:5] # for FID score # ffi = [i[0].numpy() for i in emb_imgs] fake_filtered_images = [ fake_img_list[i][0].numpy() for i in range(len(fake_img_list)) ] img_dir = os.path.join(cfg.DATA_DIR, "CUB_200_2011", "images", filenames[0].split("/")[0]) img_files = [ os.path.join(img_dir, i) for i in os.listdir(img_dir) ] # act_real = get_activations(img_files, fid_model) # mu_real, sigma_real = get_fid_stats(act_real) # print("mu_real: {}, sigma_real: {}".format(mu_real, sigma_real)) np_imgs = np.array(fake_filtered_images) # print(np_imgs.shape) # # print(type(np_imgs[0])) # act_fake = get_activations(np_imgs, fid_model, img=True) # mu_fake, sigma_fake = get_fid_stats(act_fake) # fid_score = frechet_distance(mu_real, sigma_real, mu_fake, sigma_fake) # fids.append(fid_score) # print("mu_fake: {}, sigma_fake: {}".format(mu_fake, sigma_fake)) # print(inception_score_list) # # calculate inception score # predictions = np.concatenate(predictions, 0) # mean, std = compute_inception_score(predictions, 10) # mean_nlpp, std_nlpp = \ # negative_log_posterior_probability(predictions, 10) # inception_score_list.append((mean, std, mean_nlpp, std_nlpp)) # # for FID score # fake_filtered_images = [fake_img_list[i*n_samples + k[0]][0].numpy() for i, j in enumerate(score_list) for k in j] # # fake_filtered_images = [fake_img_list[i][0].numpy() for i in range(len(fake_img_list))] # img_dir = os.path.join(cfg.DATA_DIR, "CUB_200_2011", "images", filenames[0].split("/")[0]) # img_files = [os.path.join(img_dir, i) for i in os.listdir(img_dir)] # # act_real = get_activations(img_files, fid_model) # mu_real, sigma_real = get_fid_stats(act_real) # # print("mu_real: {}, sigma_real: {}".format(mu_real, sigma_real)) # # np_imgs = np.array(fake_filtered_images) # # print(np_imgs.shape) # # # print(type(np_imgs[0])) # act_fake = get_activations(np_imgs, fid_model, img=True) # mu_fake, sigma_fake = get_fid_stats(act_fake) # # print("mu_fake: {}, sigma_fake: {}".format(mu_fake, sigma_fake)) # # # fid_score = frechet_distance(mu_real, sigma_real, mu_fake, sigma_fake) # fid_score = np.mean(fids) # fid_list.append(fid_score) # stats = 'step: {}, FID: {}, inception_score: {}, nlpp: {}\n'.format(step, fid_score, (mean, std), (mean_nlpp, std_nlpp)) # with open("results\\stats.txt", "a+") as f: # f.write(stats) # print(stats) if cfg.TEST.B_EXAMPLE: # self.save_superimages(fake_img_list, filenames, # save_dir, split_dir, 64) # self.save_superimages(fake_img_list, filenames, # save_dir, split_dir, 128) if cfg.TEST.FILTER: images_to_save = [ fake_img_list[i * n_samples + k[0]] for i, j in enumerate(score_list) for k in j ] else: images_to_save = fake_img_list self.save_superimages(images_to_save, filenames, save_dir, split_dir, 256)
def gen_example(self, data_dic): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: # Build and load the generator text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() # the path to save generated images if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')] model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) netG.cuda() netG.eval() for key in data_dic: save_dir = '%s/%s' % (s_tmp, key) mkdir_p(save_dir) captions, cap_lens, sorted_indices = data_dic[key] batch_size = captions.shape[0] nz = cfg.GAN.Z_DIM captions = Variable(torch.from_numpy(captions), volatile=True) cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True) captions = captions.cuda() cap_lens = cap_lens.cuda() for i in range(1): # 16 noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() ####################################################### # (1) Extract text embeddings ###################################################### hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) mask = (captions == 0) ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) with torch.no_grad(): fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask) # G attention cap_lens_np = cap_lens.cpu().data.numpy() for j in range(batch_size): save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j]) for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) fullpath = '%s_g%d.png' % (save_name, k) im.save(fullpath) for k in range(len(attention_maps)): if len(fake_imgs) > 1: im = fake_imgs[k + 1].detach().cpu() else: im = fake_imgs[0].detach().cpu() attn_maps = attention_maps[k] att_sze = attn_maps.size(2) img_set, sentences = \ build_super_images2(im[j].unsqueeze(0), captions[j].unsqueeze(0), [cap_lens_np[j]], self.ixtoword, [attn_maps[j]], att_sze) if img_set is not None: im = Image.fromarray(img_set) fullpath = '%s_a%d.png' % (save_name, k) im.save(fullpath)
def sampling(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.cuda() netG.eval() # load text encoder text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() #load image encoder image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) print('Load image encoder from:', img_encoder_path) image_encoder = image_encoder.cuda() image_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() model_dir = cfg.TRAIN.NET_G state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) # state_dict = torch.load(cfg.TRAIN.NET_G) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 R_count = 0 R = np.zeros(30000) cont = True for ii in range(11): # (cfg.TEXT.CAPTIONS_PER_IMAGE): if (cont == False): break for step, data in enumerate(self.data_loader, 0): cnt += batch_size if (cont == False): break if step % 100 == 0: print('cnt: ', cnt) # if step > 50: # break imgs, captions, cap_lens, class_ids, keys = prepare_data( data) hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs, mask, cap_lens) for j in range(batch_size): s_tmp = '%s/single/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): #print('Make a new folder: ', folder) mkdir_p(folder) k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d_%d.png' % (s_tmp, k, ii) im.save(fullpath) _, cnn_code = image_encoder(fake_imgs[-1]) for i in range(batch_size): mis_captions, mis_captions_len = self.dataset.get_mis_caption( class_ids[i]) hidden = text_encoder.init_hidden(99) _, sent_emb_t = text_encoder(mis_captions, mis_captions_len, hidden) rnn_code = torch.cat( (sent_emb[i, :].unsqueeze(0), sent_emb_t), 0) ### cnn_code = 1 * nef ### rnn_code = 100 * nef scores = torch.mm(cnn_code[i].unsqueeze(0), rnn_code.transpose(0, 1)) # 1* 100 cnn_code_norm = torch.norm(cnn_code[i].unsqueeze(0), 2, dim=1, keepdim=True) rnn_code_norm = torch.norm(rnn_code, 2, dim=1, keepdim=True) norm = torch.mm(cnn_code_norm, rnn_code_norm.transpose(0, 1)) scores0 = scores / norm.clamp(min=1e-8) if torch.argmax(scores0) == 0: R[R_count] = 1 R_count += 1 if R_count >= 30000: sum = np.zeros(10) np.random.shuffle(R) for i in range(10): sum[i] = np.average(R[i * 3000:(i + 1) * 3000 - 1]) R_mean = np.average(sum) R_std = np.std(sum) print("R mean:{:.4f} std:{:.4f}".format(R_mean, R_std)) cont = False