def build_models(): # build model ############################################################ text_encoder = RNN_ENCODER(dataset.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) labels = Variable(torch.LongTensor(range(batch_size))) start_epoch = 0 if cfg.TRAIN.NET_E != '': state_dict = torch.load(cfg.TRAIN.NET_E) text_encoder.load_state_dict(state_dict) print('Load ', cfg.TRAIN.NET_E) # name = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = torch.load(name) image_encoder.load_state_dict(state_dict) print('Load ', name) istart = cfg.TRAIN.NET_E.rfind('_') + 8 iend = cfg.TRAIN.NET_E.rfind('.') start_epoch = cfg.TRAIN.NET_E[istart:iend] start_epoch = int(start_epoch) + 1 print('start_epoch', start_epoch) if cfg.CUDA: text_encoder = text_encoder.to('cuda:0') image_encoder = image_encoder.to('cuda:0') labels = labels.to('cuda:0') return text_encoder, image_encoder, labels, start_epoch
def build_models(self): # ###################encoders######################################## # if cfg.TRAIN.NET_E == '': raise Exception('Error: no pretrained text encoder') image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) for p in image_encoder.parameters(): p.requires_grad = False logger.info('Load image encoder from: %s', img_encoder_path) image_encoder.eval() text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) for p in text_encoder.parameters(): p.requires_grad = False logger.info('Load text encoder from: %s', cfg.TRAIN.NET_E) text_encoder.eval() # #######################generator and discriminators############## # netsD = [] from model import D_NET64, D_NET128, D_NET256 netG = G_NET() if cfg.TREE.BRANCH_NUM > 0: netsD.append(D_NET64()) if cfg.TREE.BRANCH_NUM > 1: netsD.append(D_NET128()) if cfg.TREE.BRANCH_NUM > 2: netsD.append(D_NET256()) netG.apply(weights_init) for i in range(len(netsD)): netsD[i].apply(weights_init) logger.info('# of params in netG: %s' % count_learnable_params(netG)) logger.info('# of netsD: %s', len(netsD)) logger.info('# of params in netsD: %s' % [count_learnable_params(netD) for netD in netsD]) epoch = 0 if self.resume: checkpoint_list = sorted([ckpt for ckpt in glob.glob(self.model_dir + "/" + '*.pth')]) latest_checkpoint = checkpoint_list[-1] state_dict = torch.load(latest_checkpoint, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict["netG"]) for i in range(len(netsD)): netsD[i].load_state_dict(state_dict["netD"][i]) epoch = int(latest_checkpoint[-8:-4]) + 1 logger.info("Resuming training from checkpoint {} at epoch {}.".format(latest_checkpoint, epoch)) # if cfg.TRAIN.NET_G != '': state_dict = torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) logger.info('Load G from: %s', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('_') + 1 iend = cfg.TRAIN.NET_G.rfind('.') epoch = cfg.TRAIN.NET_G[istart:iend] epoch = int(epoch) + 1 if cfg.TRAIN.B_NET_D: Gname = cfg.TRAIN.NET_G for i in range(len(netsD)): s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netD%d.pth' % (s_tmp, i) logger.info('Load D from: %s', Dname) state_dict = torch.load(Dname, map_location=lambda storage, loc: storage) netsD[i].load_state_dict(state_dict) # ########################################################### # if cfg.CUDA: text_encoder.to(cfg.DEVICE) image_encoder.to(cfg.DEVICE) netG.to(cfg.DEVICE) if self.n_gpu > 1: netG = DataParallelPassThrough(netG, ) for i in range(len(netsD)): netsD[i].to(cfg.DEVICE) if self.n_gpu > 1: netsD[i] = DataParallelPassThrough(netsD[i], ) return [text_encoder, image_encoder, netG, netsD, epoch]
def sampling(self, split_dir, num_samples=30000): if cfg.TRAIN.NET_G == '': logger.error('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.to(cfg.DEVICE) netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) text_encoder = text_encoder.to(cfg.DEVICE) text_encoder.eval() logger.info('Loaded text encoder from: %s', cfg.TRAIN.NET_E) batch_size = self.batch_size[0] nz = cfg.GAN.GLOBAL_Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)).to(cfg.DEVICE) local_noise = Variable(torch.FloatTensor(batch_size, cfg.GAN.LOCAL_Z_DIM)).to(cfg.DEVICE) model_dir = cfg.TRAIN.NET_G state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict["netG"]) max_objects = 10 logger.info('Load G from: %s', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')].split("/")[-1] save_dir = '%s/%s/%s' % ("../output", s_tmp, split_dir) mkdir_p(save_dir) logger.info("Saving images to: {}".format(save_dir)) number_batches = num_samples // batch_size if number_batches < 1: number_batches = 1 data_iter = iter(self.data_loader) for step in tqdm(range(number_batches)): data = data_iter.next() imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot, _ = prepare_data( data, eval=True) transf_matrices = transformation_matrices[0] transf_matrices_inv = transformation_matrices[1] hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach(), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) local_noise.data.normal_(0, 1) inputs = (noise, local_noise, sent_emb, words_embs, mask, transf_matrices, transf_matrices_inv, label_one_hot, max_objects) inputs = tuple((inp.to(cfg.DEVICE) if isinstance(inp, torch.Tensor) else inp) for inp in inputs) with torch.no_grad(): fake_imgs, _, mu, logvar = netG(*inputs) for batch_idx, j in enumerate(range(batch_size)): s_tmp = '%s/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): logger.info('Make a new folder: %s', folder) mkdir_p(folder) k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, step*batch_size+batch_idx) im.save(fullpath)
def genDiscOutputs(self, split_dir, num_samples=57140): if cfg.TRAIN.NET_G == '': logger.error('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.to(cfg.DEVICE) netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) ###HACK state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) text_encoder = text_encoder.to(cfg.DEVICE) text_encoder.eval() logger.info('Loaded text encoder from: %s', cfg.TRAIN.NET_E) batch_size = self.batch_size[0] nz = cfg.GAN.GLOBAL_Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)).to(cfg.DEVICE) local_noise = Variable( torch.FloatTensor(batch_size, cfg.GAN.LOCAL_Z_DIM)).to(cfg.DEVICE) model_dir = cfg.TRAIN.NET_G state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict["netG"]) for keys in state_dict.keys(): print(keys) logger.info('Load G from: %s', model_dir) max_objects = 3 from model import D_NET256 netD = D_NET256() netD.load_state_dict(state_dict["netD"][2]) netD.eval() # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')].split("/")[-1] save_dir = '%s/%s/%s' % ("../output", s_tmp, split_dir) mkdir_p(save_dir) logger.info("Saving images to: {}".format(save_dir)) number_batches = num_samples // batch_size if number_batches < 1: number_batches = 1 data_iter = iter(self.data_loader) real_labels, fake_labels, match_labels = self.prepare_labels() for step in tqdm(range(number_batches)): data = data_iter.next() imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot, _ = prepare_data( data, eval=True) transf_matrices = transformation_matrices[0] transf_matrices_inv = transformation_matrices[1] hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach(), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) local_noise.data.normal_(0, 1) inputs = (noise, local_noise, sent_emb, words_embs, mask, transf_matrices, transf_matrices_inv, label_one_hot, max_objects) inputs = tuple( (inp.to(cfg.DEVICE) if isinstance(inp, torch.Tensor ) else inp) for inp in inputs) with torch.no_grad(): fake_imgs, _, mu, logvar = netG(*inputs) inputs = (fake_imgs, fake_labels, transf_matrices, transf_matrices_inv, max_objects) codes = netsD[-1].partial_forward(*inputs)
def build_models(self): # ###################encoders######################################## # if cfg.TRAIN.NET_E == '': print('Error: no pretrained text-image encoders') return image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) for p in image_encoder.parameters(): p.requires_grad = False print('Load image encoder from:', img_encoder_path) image_encoder.eval() text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) for p in text_encoder.parameters(): p.requires_grad = False print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder.eval() # #######################generator and discriminators############## # netG = G_NET(len(self.cats_index_dict)) netsPatD, netsShpD = [], [] if cfg.TREE.BRANCH_NUM > 0: netsPatD.append(PAT_D_NET64()) netsShpD.append(SHP_D_NET64(len(self.cats_index_dict))) if cfg.TREE.BRANCH_NUM > 1: netsPatD.append(PAT_D_NET128()) netsShpD.append(SHP_D_NET128(len(self.cats_index_dict))) if cfg.TREE.BRANCH_NUM > 2: netsPatD.append(PAT_D_NET256()) netsShpD.append(SHP_D_NET256(len(self.cats_index_dict))) netObjSSD = OBJ_SS_D_NET(len(self.cats_index_dict)) netObjLSD = OBJ_LS_D_NET(len(self.cats_index_dict)) netG.apply(weights_init) netObjSSD.apply(weights_init) netObjLSD.apply(weights_init) for i in range(len(netsPatD)): netsPatD[i].apply(weights_init) netsShpD[i].apply(weights_init) print('# of netsPatD', len(netsPatD)) # ########################################################### # if cfg.CUDA: text_encoder = text_encoder.cuda() image_encoder = image_encoder.cuda() netG.cuda() netObjSSD.cuda() netObjLSD.cuda() for i in range(len(netsPatD)): netsPatD[i].cuda() netsShpD[i].cuda() if len(cfg.GPU_IDS) > 1: text_encoder = nn.DataParallel(text_encoder) text_encoder.to(self.device) image_encoder = nn.DataParallel(image_encoder) image_encoder.to(self.device) netG = nn.DataParallel(netG) netG.to(self.device) netObjSSD = nn.DataParallel(netObjSSD) netObjSSD.to(self.device) netObjLSD = nn.DataParallel(netObjLSD) netObjLSD.to(self.device) for i in range(len(netsPatD)): netsPatD[i] = nn.DataParallel(netsPatD[i]) netsPatD[i].to(self.device) netsShpD[i] = nn.DataParallel(netsShpD[i]) netsShpD[i].to(self.device) # epoch = 0 if cfg.TRAIN.NET_G != '': state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('_') + 1 iend = cfg.TRAIN.NET_G.rfind('.') epoch = cfg.TRAIN.NET_G[istart:iend] epoch = int(epoch) + 1 Gname = cfg.TRAIN.NET_G for i in range(len(netsPatD)): s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netPatD%d.pth' % (s_tmp, i) print('Load PatD from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netsPatD[i].load_state_dict(state_dict) Dname = '%s/netShpD%d.pth' % (s_tmp, i) print('Load ShpD from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netsShpD[i].load_state_dict(state_dict) s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netObjSSD.pth' % (s_tmp) print('Load ObjSSD from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netObjSSD.load_state_dict(state_dict) s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netObjLSD.pth' % (s_tmp) print('Load ObjLSD from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netObjLSD.load_state_dict(state_dict) return [ text_encoder, image_encoder, netG, netsPatD, netsShpD, netObjSSD, netObjLSD, epoch ]
def build_models(self): # ############################## encoders ############################# # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder.eval() image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) for p in image_encoder.parameters(): p.requires_grad = False print('Load image encoder from:', img_encoder_path) image_encoder.eval() # ########### image generator and (potential) shape generator ########## # netG = G_NET(len(self.cats_index_dict)) netG.apply(weights_init) netG.eval() netShpG = None if cfg.TEST.USE_GT_BOX_SEG > 0: netShpG = SHP_G_NET(len(self.cats_index_dict)) netShpG.apply(weights_init) netShpG.eval() # ################### parallization and initialization ################## # if cfg.CUDA: text_encoder.cuda() image_encoder.cuda() netG.cuda() if cfg.TEST.USE_GT_BOX_SEG > 0: netShpG.cuda() if len(cfg.GPU_IDS) > 1: text_encoder = nn.DataParallel(text_encoder) text_encoder.to(self.device) image_encoder = nn.DataParallel(image_encoder) image_encoder.to(self.device) netG = nn.DataParallel(netG) netG.to(self.device) if cfg.TEST.USE_GT_BOX_SEG > 0: netShpG = nn.DataParallel(netShpG) netShpG.to(self.device) state_dict = torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', cfg.TRAIN.NET_G) if cfg.TEST.USE_GT_BOX_SEG > 0: state_dict = torch.load(cfg.TEST.NET_SHP_G, map_location=lambda storage, loc: storage) netShpG.load_state_dict(state_dict) print('Load Shape G from: ', cfg.TEST.NET_SHP_G) return [text_encoder, image_encoder, netG, netShpG]
def build_models(self): # ###################encoders######################################## # if cfg.TRAIN.NET_E == '': print('Error: no pretrained text-image encoders') return image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) for p in image_encoder.parameters(): p.requires_grad = False print('Load image encoder from:', img_encoder_path) image_encoder.eval() text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) for p in text_encoder.parameters(): p.requires_grad = False print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder.eval() # #######################generator and discriminators############## # netsD = [] if cfg.GAN.B_DCGAN: if cfg.TREE.BRANCH_NUM == 1: from model import D_NET64 as D_NET elif cfg.TREE.BRANCH_NUM == 2: from model import D_NET128 as D_NET else: # cfg.TREE.BRANCH_NUM == 3: from model import D_NET256 as D_NET # TODO: elif cfg.TREE.BRANCH_NUM > 3: netG = G_DCGAN() netsD = [D_NET(b_jcu=False)] else: from model import D_NET64, D_NET128, D_NET256 netG = G_NET() if cfg.TREE.BRANCH_NUM > 0: netsD.append(D_NET64()) if cfg.TREE.BRANCH_NUM > 1: netsD.append(D_NET128()) if cfg.TREE.BRANCH_NUM > 2: netsD.append(D_NET256()) # TODO: if cfg.TREE.BRANCH_NUM > 3: netG.apply(weights_init) # print(netG) for i in range(len(netsD)): netsD[i].apply(weights_init) # print(netsD[i]) print('# of netsD', len(netsD)) # epoch = 0 if cfg.TRAIN.NET_G != '': state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('_') + 1 iend = cfg.TRAIN.NET_G.rfind('.') epoch = cfg.TRAIN.NET_G[istart:iend] epoch = int(epoch) + 1 if cfg.TRAIN.B_NET_D: Gname = cfg.TRAIN.NET_G for i in range(len(netsD)): s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netD%d.pth' % (s_tmp, i) print('Load D from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netsD[i].load_state_dict(state_dict) # ########################################################### # if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs text_encoder = nn.DataParallel(text_encoder) image_encoder = nn.DataParallel(image_encoder) netG = nn.DataParallel(netG) for i in range(len(netsD)): netsD[i] = nn.DataParallel(netsD[i]) image_encoder.to(self.device) text_encoder.to(self.device) netG.to(self.device) for i in range(len(netsD)): netsD[i].to(self.device) # if cfg.CUDA and torch.cuda.is_available(): # text_encoder = text_encoder.cuda() # image_encoder = image_encoder.cuda() # netG.cuda() # for i in range(len(netsD)): # netsD[i].cuda() # if cfg.PARALLEL: # netG = torch.nn.DataParallel(netG, device_ids=[0, 1, 2]) # text_encoder = torch.nn.DataParallel(text_encoder, device_ids=[0, 1, 2]) # image_encoder = torch.nn.DataParallel(image_encoder, device_ids=[0, 1, 2]) # for i in range(len(netsD)): # netsD[i] = torch.nn.DataParallel(netsD[i], device_ids=[0, 1, 2]) return [text_encoder, image_encoder, netG, netsD, epoch]
def gen_example(self, data_dic): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: # Build and load the generator text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.to(cfg.DEVICE) text_encoder.eval() # the path to save generated images if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')] model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) netG.to(cfg.DEVICE) netG.eval() for key in data_dic: save_dir = '%s/%s' % (s_tmp, key) mkdir_p(save_dir) captions, cap_lens, sorted_indices = data_dic[key] batch_size = captions.shape[0] nz = cfg.GAN.Z_DIM captions = torch.from_numpy(captions) cap_lens = torch.from_numpy(cap_lens) captions = captions.to(cfg.DEVICE) cap_lens = cap_lens.to(cfg.DEVICE) for i in range(1): # 16 noise = torch.FloatTensor(batch_size, nz) noise = noise.to(cfg.DEVICE) # (1) Extract text embeddings hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) mask = (captions == 0) # (2) Generate fake images noise.data.normal_(0, 1) fake_imgs, attention_maps, _, _ = netG( noise, sent_emb, words_embs, mask) # G attention cap_lens_np = cap_lens.cpu().data.numpy() for j in range(batch_size): save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j]) for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) fullpath = '%s_g%d.png' % (save_name, k) im.save(fullpath) for k in range(len(attention_maps)): if len(fake_imgs) > 1: im = fake_imgs[k + 1].detach().cpu() else: im = fake_imgs[0].detach().cpu() attn_maps = attention_maps[k] att_sze = attn_maps.size(2) img_set, sentences = \ build_super_images2(im[j].unsqueeze(0), captions[j].unsqueeze(0), [cap_lens_np[j]], self.ixtoword, [attn_maps[j]], att_sze) if img_set is not None: im = Image.fromarray(img_set) fullpath = '%s_a%d.png' % (save_name, k) im.save(fullpath)
def build_models(self): print('Building models...') print('N_words: ', self.n_words) ##################### ## TEXT ENCODERS ## ##################### if cfg.TRAIN.NET_E == '': print('Error: no pretrained text-image encoders') return image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM) img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder') state_dict = \ torch.load(img_encoder_path, map_location=lambda storage, loc: storage) image_encoder.load_state_dict(state_dict) print('Built image encoder: ', image_encoder) for p in image_encoder.parameters(): p.requires_grad = False print('Load image encoder from:', img_encoder_path) image_encoder.eval() text_encoder = \ RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Built text encoder: ', text_encoder) for p in text_encoder.parameters(): p.requires_grad = False print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder.eval() ###################### ## CAPTION MODELS ## ###################### # cnn_encoder and rnn_encoder if cfg.CAP.USE_ORIGINAL: caption_cnn = CAPTION_CNN(embed_size=cfg.TEXT.EMBEDDING_DIM) caption_rnn = CAPTION_RNN(embed_size=cfg.TEXT.EMBEDDING_DIM, hidden_size=cfg.CAP.HIDDEN_SIZE, vocab_size=self.n_words, num_layers=cfg.CAP.NUM_LAYERS) else: caption_cnn = Encoder() caption_rnn = Decoder(idx2word=self.ixtoword) caption_cnn_checkpoint = torch.load( cfg.CAP.CAPTION_CNN_PATH, map_location=lambda storage, loc: storage) caption_rnn_checkpoint = torch.load( cfg.CAP.CAPTION_RNN_PATH, map_location=lambda storage, loc: storage) caption_cnn.load_state_dict(caption_cnn_checkpoint['model_state_dict']) caption_rnn.load_state_dict(caption_rnn_checkpoint['model_state_dict']) for p in caption_cnn.parameters(): p.requires_grad = False print('Load caption model from: ', cfg.CAP.CAPTION_CNN_PATH) caption_cnn.eval() for p in caption_rnn.parameters(): p.requires_grad = False print('Load caption model from: ', cfg.CAP.CAPTION_RNN_PATH) ################################# ## GENERATOR & DISCRIMINATOR ## ################################# netsD = [] if cfg.GAN.B_DCGAN: if cfg.TREE.BRANCH_NUM == 1: from model import D_NET64 as D_NET elif cfg.TREE.BRANCH_NUM == 2: from model import D_NET128 as D_NET else: # cfg.TREE.BRANCH_NUM == 3: from model import D_NET256 as D_NET netG = G_DCGAN() netsD = [D_NET(b_jcu=False)] else: from model import D_NET64, D_NET128, D_NET256 netG = G_NET() if cfg.TREE.BRANCH_NUM > 0: netsD.append(D_NET64()) if cfg.TREE.BRANCH_NUM > 1: netsD.append(D_NET128()) if cfg.TREE.BRANCH_NUM > 2: netsD.append(D_NET256()) netG.apply(weights_init) # print(netG) for i in range(len(netsD)): netsD[i].apply(weights_init) # print(netsD[i]) print('# of netsD', len(netsD)) epoch = 0 if cfg.TRAIN.NET_G != '': state_dict = \ torch.load(cfg.TRAIN.NET_G, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', cfg.TRAIN.NET_G) istart = cfg.TRAIN.NET_G.rfind('_') + 1 iend = cfg.TRAIN.NET_G.rfind('.') epoch = cfg.TRAIN.NET_G[istart:iend] epoch = int(epoch) + 1 if cfg.TRAIN.B_NET_D: Gname = cfg.TRAIN.NET_G for i in range(len(netsD)): s_tmp = Gname[:Gname.rfind('/')] Dname = '%s/netD%d.pth' % (s_tmp, i) print('Load D from: ', Dname) state_dict = \ torch.load(Dname, map_location=lambda storage, loc: storage) netsD[i].load_state_dict(state_dict) text_encoder = text_encoder.to(cfg.DEVICE) image_encoder = image_encoder.to(cfg.DEVICE) caption_cnn = caption_cnn.to(cfg.DEVICE) caption_rnn = caption_rnn.to(cfg.DEVICE) netG.to(cfg.DEVICE) for i in range(len(netsD)): netsD[i].to(cfg.DEVICE) return [ text_encoder, image_encoder, caption_cnn, caption_rnn, netG, netsD, epoch ]
def sampling(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for model is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.to(cfg.DEVICE) netG.eval() # text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = \ torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.to(cfg.DEVICE) text_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = torch.FloatTensor(batch_size, nz) noise = noise.to(cfg.DEVICE) model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 for _ in range(1): # (cfg.TEXT.CAPTIONS_PER_IMAGE): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 100 == 0: print('step: ', step) # if step > 50: # break imgs, captions, cap_lens, class_ids, keys = prepare_data( data) hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] # (2) Generate fake images noise.data.normal_(0, 1) fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs, mask) for j in range(batch_size): s_tmp = '%s/single/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, k) im.save(fullpath)