def save_super_images(sample_batchs, hr_sample_batchs, captions_batch, batch_size, startID, save_dir): if not os.path.isdir(save_dir): print('Make a new folder: ', save_dir) mkdir_p(save_dir) img_shape = hr_sample_batchs[0][0].shape for j in range(batch_size): if not re.search('[a-zA-Z]+', captions_batch[j]): continue padding = np.ones(img_shape) * 255 row = [] for i in range(np.minimum(8, len(sample_batchs))): lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) row.append(hr_img) row.append(np.ones((hr_img.shape[0], 100, 3)) * 255) row1 = np.concatenate(row[:7], axis=1) row2 = np.concatenate(row[8:-1], axis=1) mid_padding = np.ones((100, row1.shape[1], 3)) * 255 superimage = np.concatenate([row1, mid_padding, row2], axis=0) top_padding = np.ones((128, superimage.shape[1], 3)) * 255 superimage =\ np.concatenate([top_padding, superimage], axis=0) fullpath = '%s/sentence%d.jpg' % (save_dir, startID + j) superimage = drawCaption(np.uint8(superimage), captions_batch[j]) scipy.misc.imsave(fullpath, superimage)
def save_super_images(sample_batches, hr_sample_batches, captions_batch, batch_size, startID, save_dir): if not os.path.isdir(save_dir): print("Making a new folder: ", save_dir) mkdir_p(save_dir) # Save up to 16 samples for each text embedding img_shape = hr_sample_batches[0][0].shape for j in range(batch_size): if not re.search('[a-zA-Z]+', captions_batch[j]): continue padding = np.zeros(img_shape) row1 = [padding] row2 = [padding] # First row with up to 8 samples for i in range(np.minimum(8, len(sample_batches))): lr_img = sample_batches[i][j] hr_img = hr_sample_batches[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) row2 = np.concatenate(row2, axis=1) superimage = np.concatenate([row1, row2], axis=0) # Second row with up to 8 samples if len(sample_batches) > 8: row1 = [padding] row2 = [padding] for i in range(8, len(sample_batches)): lr_img = sample_batches[i][j] hr_img = hr_sample_batches[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) row2 = np.concatenate(row2, axis=1) super_row = np.concatenate([row1, row2], axis=0) superimage2 = np.zeros_like(superimage) superimage2[:super_row.shape[0], :super_row.shape[1], :super_row.shape[2]] = super_row mid_padding = np.zeros((64, superimage.shape[1], 3)) superimage = np.concatenate([superimage, mid_padding, superimage2], axis=0) top_padding = np.zeros((128, superimage.shape[1], 3)) superimage = np.concatenate([top_padding, superimage], axis=0) fullpath = '%s/sentence%d.jpg' % (save_dir, startID + j) superimage = drawCaption(np.uint8(superimage), captions_batch[j]) scipy.misc.imsave(fullpath, superimage)
def save_super_images(self, images, sample_batchs, filenames, sentenceID, save_dir, subset): # batch_size samples for each embedding numSamples = len(sample_batchs) for j in range(len(filenames)): s_tmp_r = '%s-1real-%dsamples/%s/real-latents/%s' %\ (save_dir, numSamples, subset, filenames[j]) s_tmp_f = '%s-1real-%dsamples/%s/fake-latents/%s' %\ (save_dir, numSamples, subset, filenames[j]) folder_r = s_tmp_r[:s_tmp_r.rfind('/')] if not os.path.isdir(folder_r): print('Make a new folder: ', folder_r) mkdir_p(folder_r) folder_f = s_tmp_f[:s_tmp_f.rfind('/')] if not os.path.isdir(folder_f): print('Make a new folder: ', folder_f) mkdir_p(folder_f) #superimage_r = [images[j]] #superimage_f = [images[j]] # cfg.TRAIN.NUM_COPY samples for each text embedding/sentence for i in range(len(sample_batchs)): #superimage_r.append(sample_batchs[i][0][j]) #superimage_f.append(sample_batchs[i][1][j]) scipy.misc.imsave('%s_sentence%d_%d.jpg' % (s_tmp_r, sentenceID, i), sample_batchs[i][0][j]) scipy.misc.imsave('%s_sentence%d_%d.jpg' % (s_tmp_f, sentenceID, i), sample_batchs[i][1][j])
def __init__(self, output_dir, data_loader, dataloader_val): if cfg.TRAIN: self.model_dir = os.path.join(output_dir, 'Model') self.image_dir = os.path.join(output_dir, 'Image') mkdir_p(self.model_dir) mkdir_p(self.image_dir) torch.cuda.set_device(cfg.GPU_ID) cudnn.benchmark = True self.batch_size = data_loader.batch_size self.val_batch_size = dataloader_val.batch_size self.max_epoch = cfg.epochs self.snapshot_interval = cfg.snapshot_interval self.data_loader = data_loader self.dataloader_val = dataloader_val self.num_batches = len(self.data_loader) self.bert_config = BertConfig( vocab_size=data_loader.dataset.vocab_size, hidden_size=512, num_hidden_layers=3, num_attention_heads=8, intermediate_size=2048, hidden_act='gelu', hidden_dropout_prob=cfg.hidden_dropout_prob, attention_probs_dropout_prob=cfg.attention_probs_dropout_prob, max_position_embeddings=512, layer_norm_eps=1e-12, initializer_range=0.02, type_vocab_size=2, pad_token_id=0)
def __init__(self, output_dir, data_loader, dataloader_val): if cfg.TRAIN: self.model_dir = os.path.join(output_dir, 'Model') self.image_dir = os.path.join(output_dir, 'Image') mkdir_p(self.model_dir) mkdir_p(self.image_dir) torch.cuda.set_device(cfg.GPU_ID) cudnn.benchmark = True Q_c = np.array([46, 53, 86, 103, 137, 265, 290, 2422]) Q = 3166.0 self.class_weight_vector = torch.tensor((Q - Q_c) / Q) self.batch_size = data_loader.batch_size self.val_batch_size = dataloader_val.batch_size self.max_epoch = cfg.epochs self.snapshot_interval = cfg.snapshot_interval pos_weights = torch.tensor( [3.255, 3.255, 3.255, 3.255, 3.255, 3.255, 3.255, 1.0]) self.criterion = nn.BCEWithLogitsLoss(reduction='none', pos_weight=pos_weights) self.data_loader = data_loader self.dataloader_val = dataloader_val self.num_batches = len(self.data_loader) if cfg.CUDA: pos_weights = pos_weights.cuda() self.class_weight_vector = self.class_weight_vector.cuda() self.criterion = self.criterion.cuda()
def partial_fit(self, X, last=False): """Train model based on mini-batch of input data. Return cost of mini-batch. """ opt, cost, cross_entropy, MI, summary = \ self.sess.run((self.optimizer, self.cost, self.cross_entropy, self.MI, self.merged), feed_dict={self.x: X}) self.train_summary_writer.add_summary(summary, self.step) if last: now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%H_%M_%S_%Y%m%d') n_z = self.network_architecture['n_z'] n_c = self.network_architecture['n_c'] savefolder = '{}/DS-{}_nz{}_nc{}_info{}_{}'.format(SAVE_MODEL_TO, self.dataset_name, n_z, n_c, self.info, timestamp) mkdir_p(savefolder) self.saver.save(self.sess, '{}/model'.format(savefolder)) self.step += 1 return cost
def save_super_images(sample_batchs, hr_sample_batchs, captions_batch, batch_size, startID, save_dir): if not os.path.isdir(save_dir): print('Make a new folder: ', save_dir) mkdir_p(save_dir) # Save up to 16 samples for each text embedding/sentence img_shape = hr_sample_batchs[0][0].shape for j in range(batch_size): if not re.search('[a-zA-Z]+', captions_batch[j]): continue padding = np.zeros(img_shape) row1 = [padding] row2 = [padding] # First row with up to 8 samples for i in range(np.minimum(8, len(sample_batchs))): lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) row2 = np.concatenate(row2, axis=1) superimage = np.concatenate([row1, row2], axis=0) # Second 8 samples with up to 8 samples if len(sample_batchs) > 8: row1 = [padding] row2 = [padding] for i in range(8, len(sample_batchs)): lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) row2 = np.concatenate(row2, axis=1) super_row = np.concatenate([row1, row2], axis=0) superimage2 = np.zeros_like(superimage) superimage2[:super_row.shape[0], :super_row.shape[1], :super_row.shape[2]] = super_row mid_padding = np.zeros((64, superimage.shape[1], 3)) superimage =\ np.concatenate([superimage, mid_padding, superimage2], axis=0) top_padding = np.zeros((128, superimage.shape[1], 3)) superimage =\ np.concatenate([top_padding, superimage], axis=0) fullpath = '%s/sentence%d.jpg' % (save_dir, startID + j) superimage = drawCaption(np.uint8(superimage), captions_batch[j]) scipy.misc.imsave(fullpath, superimage)
def sample(self, datapath, stage=1): if stage == 1: netG, _ = self.load_network_stageI() else: netG, _ = self.load_network_stageII() netG.eval() # Load text embeddings generated from the encoder t_file = torchfile.load(datapath) captions_list = t_file.raw_txt embeddings = np.concatenate(t_file.fea_txt, axis=0) num_embeddings = len(captions_list) print('Successfully load sentences from: ', datapath) print('Total number of sentences:', num_embeddings) print('num_embeddings:', num_embeddings, embeddings.shape) # path to save generated samples save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')] mkdir_p(save_dir) batch_size = np.minimum(num_embeddings, self.batch_size) nz = cfg.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz)) if cfg.CUDA: noise = noise.cuda() count = 0 while count < num_embeddings: if count > 3000: break iend = count + batch_size if iend > num_embeddings: iend = num_embeddings count = num_embeddings - batch_size embeddings_batch = embeddings[count:iend] # captions_batch = captions_list[count:iend] txt_embedding = Variable(torch.FloatTensor(embeddings_batch)) if cfg.CUDA: txt_embedding = txt_embedding.cuda() ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) inputs = (txt_embedding, noise) _, fake_imgs, mu, logvar = \ nn.parallel.data_parallel(netG, inputs, self.gpus) for i in range(batch_size): save_name = '%s/%d.png' % (save_dir, count + i) im = fake_imgs[i].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) im.save(save_name) count += batch_size
def save_super_images(self, images, sample_batchs, hr_sample_batchs, savenames, captions_batchs, sentenceID, save_dir, subset): # batch_size samples for each embedding # Up to 16 samples for each text embedding/sentence numSamples = len(sample_batchs) for j in range(len(savenames)): s_tmp = '%s-1real-%dsamples/%s/%s' % (save_dir, numSamples, subset, savenames[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): mkdir_p(folder) # first row with up to 8 samples real_img = (images[j] + 1.0) * 127.5 img_shape = real_img.shape padding0 = np.zeros(img_shape) padding = np.zeros((img_shape[0], 20, 3)) row1 = [padding0, real_img, padding] row2 = [padding0, real_img, padding] for i in range(np.minimum(8, numSamples)): lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) row2 = np.concatenate(row2, axis=1) superimage = np.concatenate([row1, row2], axis=0) # second 8 samples with up to 8 samples if len(sample_batchs) > 8: row1 = [padding0, real_img, padding] row2 = [padding0, real_img, padding] for i in range(8, len(sample_batchs)): lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) row2 = np.concatenate(row2, axis=1) super_row = np.concatenate([row1, row2], axis=0) superimage2 = np.zeros_like(superimage) superimage2[:super_row.shape[0], :super_row. shape[1], :super_row.shape[2]] = super_row mid_padding = np.zeros((64, superimage.shape[1], 3)) superimage = np.concatenate( [superimage, mid_padding, superimage2], axis=0) top_padding = np.zeros((128, superimage.shape[1], 3)) superimage = np.concatenate([top_padding, superimage], axis=0) captions = captions_batchs[j][sentenceID] fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID) superimage = self.drawCaption(np.uint8(superimage), captions) scipy.misc.imsave(fullpath, superimage)
def save_images(samples_batches, startID, save_dir): if not os.path.isdir(save_dir): print('Make a new folder: ', save_dir) mkdir_p(save_dir) k = 0 for samples in samples_batches: for sample in samples: full_path = os.path.join(save_dir, startID + k) sp.misc.imsave(str(full_path), sample) k += 1 print("%i images saved in %s directory" % (k, save_dir))
def save_super_images(self, images, sample_batchs, filenames, sentenceID, save_dir, subset): # batch_size samples for each embedding numSamples = len(sample_batchs) for j in range(len(filenames)): s_tmp = '%s-1real-%dsamples/%s/%s' %\ (save_dir, numSamples, subset, filenames[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) superimage = [images[j]] # cfg.TRAIN.NUM_COPY samples for each text embedding/sentence for i in range(len(sample_batchs)): superimage.append(sample_batchs[i][j]) superimage = np.concatenate(superimage, axis=1) fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID) scipy.misc.imsave(fullpath, superimage)
def __init__(self, moments_path, # if use preprocess, this should be removed ckt_logs_dir='ckt_logs', exp_name='a2t_model_'+datetime.now().strftime('%Y%m%d-%H:%M:%S')): self.model = None self.model_path = '' self.optimizer = None self.lr_scheduler = None self.epoch = 0 self.global_step = 0 self.plot_func = plot_func() self.moments_path = moments_path self.N_GPU = len(cfg.TRAIN.GPU_ID) self.exp_name = exp_name if not cfg.TRAIN.FLAG: # test ckt_logs_dir = cfg.TEST.PRETRAINED_MODEL_DIR self.model_path = os.path.join(cfg.TEST.PRETRAINED_MODEL_DIR, cfg.TEST.MODEL_FP) elif cfg.TRAIN.RESTORE: # train from restored model ckt_logs_dir = cfg.TRAIN.PRETRAINED_MODEL_DIR self.model_path = os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, cfg.TRAIN.MODEL_FP) else: ckt_logs_dir = os.path.join(ckt_logs_dir, self.exp_name) mkdir_p(ckt_logs_dir) self.log_dir = ckt_logs_dir # # Currently use tensorboard to record test results, thus this folder not used # self.test_log_dir = os.path.join(self.log_dir, 'test/tvs_test') # mkdir_p(self.test_log_dir) self.LR_STARTER = cfg.A2TTRAIN.LR self.LR_DECAY_EPOCH = cfg.A2TTRAIN.LR_DECAY_EPOCH self.LR_DECAY_RATE = cfg.A2TTRAIN.LR_DECAY_RATE self.writer = tensorboard.SummaryWriter(self.log_dir) self.__build_model()
def __init__(self, output_dir): if cfg.TRAIN.FLAG: self.model_dir = os.path.join(output_dir, 'Model') self.image_dir = os.path.join(output_dir, 'Image') self.log_dir = os.path.join(output_dir, 'Log') mkdir_p(self.model_dir) mkdir_p(self.image_dir) mkdir_p(self.log_dir) self.summary_writer = FileWriter(self.log_dir) self.max_epoch = cfg.TRAIN.MAX_EPOCH self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL s_gpus = cfg.GPU_ID.split(',') self.gpus = [int(ix) for ix in s_gpus] self.num_gpus = len(self.gpus) self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus torch.cuda.set_device(self.gpus[0]) cudnn.benchmark = True
def train(self): now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') # LAMBDA_FT,LAMBDA_FI,LAMBDA_DAMSM=01,50,10 tb_dir = '../tensorboard/{0}_{1}_{2}'.format(cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) mkdir_p(tb_dir) tbw = SummaryWriter(log_dir=tb_dir) # Tensorboard logging ####### init models ######## text_encoder, image_encoder, start_epoch, = self.build_models() labels = Variable(torch.LongTensor(range(self.batch_size))) # used for matching loss text_encoder.train() image_encoder.train() ############################################################### ###### init optimizers ##### optimizerI, optimizerT, lr_schedulerI, lr_schedulerT = self.define_optimizers(image_encoder, text_encoder) ############################################ ##### init data ############################# match_labels = self.prepare_labels() batch_size = self.batch_size ################################################################## ###### init caption model criterion ############ if cfg.CUDA: labels = labels.cuda() ################################################# tensorboard_step = 0 gen_iterations = 0 # gen_iterations = start_epoch * self.num_batches #### print lambdas ### # print('LAMBDA_GEN:{0},LAMBDA_CAP:{1},LAMBDA_FT:{2},LAMBDA_FI:{3},LAMBDA_DAMSM:{4}'.format(cfg.TRAIN.SMOOTH.LAMBDA_GEN # ,cfg.TRAIN.SMOOTH.LAMBDA_CAP # ,cfg.TRAIN.SMOOTH.LAMBDA_FT # ,cfg.TRAIN.SMOOTH.LAMBDA_FI # ,cfg.TRAIN.SMOOTH.LAMBDA_DAMSM)) for epoch in range(start_epoch, self.max_epoch): ##### set everything to trainable #### text_encoder.train() image_encoder.train() #################################### ####### init loss variables ############ s_total_loss0 = 0 s_total_loss1 = 0 w_total_loss0 = 0 w_total_loss1 = 0 s_t_total_loss0 = 0 s_t_total_loss1 = 0 w_t_total_loss0 = 0 w_t_total_loss1 = 0 total_damsm_loss = 0 total_t_loss = 0 ####### print out lr of each optimizer before training starts, make sure lrs are correct ######### print('Learning rates: lr_i %.7f, lr_t %.7f' % (optimizerI.param_groups[0]['lr'], optimizerT.param_groups[0]['lr'])) ######################################################################################### start_t = time.time() data_iter = iter(self.data_loader) # step = 0 pbar = tqdm(range(self.num_batches)) for step in pbar: # while step < self.num_batches: ###################################################### # (1) Prepare training data and Compute text embeddings ###################################################### imgs, captions, masks, class_ids, cap_lens = data_iter.next() class_ids = class_ids.numpy() ids = np.array(list(range(batch_size))) neg_ids = Variable(torch.LongTensor([np.random.choice(ids[ids!=x]) for x in ids])) # used for matching loss if cfg.CUDA: imgs, captions, masks, cap_lens = imgs.cuda(), captions.cuda(), masks.cuda(), cap_lens.cuda() neg_ids = neg_ids.cuda() # add images, image masks, captions, caption masks for catr model ################## feedforward damsm model ################## image_encoder.zero_grad() # image/text encoders zero_grad here text_encoder.zero_grad() words_features, sent_code = image_encoder(imgs) # input images to image encoder, feedforward nef, att_sze = words_features.size(1), words_features.size(2) # hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder(captions, masks) # #### damsm losses # w_loss0, w_loss1, attn_maps = words_loss(words_features, words_embs[:,:,1:], labels, cap_lens-1, class_ids, batch_size) # w_total_loss0 += w_loss0.item() # w_total_loss1 += w_loss1.item() # damsm_loss = w_loss0 + w_loss1 # s_loss0, s_loss1 = sent_loss(sent_code, sent_emb, labels, class_ids, batch_size) # s_total_loss0 += s_loss0.item() # s_total_loss1 += s_loss1.item() # damsm_loss += s_loss0 + s_loss1 # total_damsm_loss += damsm_loss.item() # #### triplet loss s_t_loss0, s_t_loss1 = sent_triplet_loss(sent_code, sent_emb, labels, neg_ids, batch_size) s_t_total_loss0 += s_t_loss0.item() s_t_total_loss1 += s_t_loss1.item() t_loss = s_t_loss0 + s_t_loss1 # w_t_loss0, w_t_loss1, attn_maps = words_triplet_loss(words_features,words_embs[:,:,1:], labels, neg_ids, cap_lens-1, batch_size) # w_t_total_loss0 += w_t_loss0.item() # w_t_total_loss1 += w_t_loss1.item() # t_loss += w_t_loss0 + w_t_loss1 total_t_loss += t_loss.item() ############################################################################ # damsm_loss.backward() t_loss.backward() torch.nn.utils.clip_grad_norm_(image_encoder.parameters(), cfg.clip_max_norm) optimizerI.step() torch.nn.utils.clip_grad_norm_(text_encoder.parameters(), cfg.clip_max_norm) optimizerT.step() ##################### loss values for each step ######################################### # ## damsm ## # tbw.add_scalar('Train_step/train_w_step_loss0', float(w_loss0.item()), step + epoch * self.num_batches) # tbw.add_scalar('Train_step/train_s_step_loss0', float(s_loss0.item()), step + epoch * self.num_batches) # tbw.add_scalar('Train_step/train_w_step_loss1', float(w_loss1.item()), step + epoch * self.num_batches) # tbw.add_scalar('Train_step/train_s_step_loss1', float(s_loss1.item()), step + epoch * self.num_batches) # tbw.add_scalar('Train_step/train_damsm_step_loss', float(damsm_loss.item()), step + epoch * self.num_batches) ## triplet ## # tbw.add_scalar('Train_step/train_w_t_step_loss0', float(w_t_loss0.item()), step + epoch * self.num_batches) tbw.add_scalar('Train_step/train_s_t_step_loss0', float(s_t_loss0.item()), step + epoch * self.num_batches) # tbw.add_scalar('Train_step/train_w_t_step_loss1', float(w_t_loss1.item()), step + epoch * self.num_batches) tbw.add_scalar('Train_step/train_s_t_step_loss1', float(s_t_loss1.item()), step + epoch * self.num_batches) tbw.add_scalar('Train_step/train_t_step_loss', float(t_loss.item()), step + epoch * self.num_batches) ################################################################################################ ############ tqdm descriptions showing running average loss in terminal ############################## # pbar.set_description('damsm %.5f' % ( float(total_damsm_loss) / (step+1))) pbar.set_description('triplet %.5f' % ( float(total_t_loss) / (step+1))) ###################################################################################################### ########################################################## # v_s_cur_loss, v_w_cur_loss = self.evaluate(image_encoder, text_encoder, self.val_batch_size) # print('[epoch: %d] val_w_loss: %.4f, val_s_loss: %.4f' % (epoch, v_w_cur_loss, v_s_cur_loss)) # ### val losses ### # tbw.add_scalar('Val_step/val_w_loss', float(v_w_cur_loss), epoch) # tbw.add_scalar('Val_step/val_s_loss', float(v_s_cur_loss), epoch) v_s_cur_loss, _ = self.evaluate(image_encoder, text_encoder, self.val_batch_size) print('[epoch: %d] val_s_loss: %.4f' % (epoch, v_s_cur_loss)) ### val losses ### tbw.add_scalar('Val_step/val_s_loss', float(v_s_cur_loss), epoch) lr_schedulerI.step() lr_schedulerT.step() end_t = time.time() if epoch % cfg.snapshot_interval == 0: self.save_model(image_encoder, text_encoder, optimizerI, optimizerT, lr_schedulerI, lr_schedulerT, epoch) self.save_model(image_encoder, text_encoder, optimizerI, optimizerT, lr_schedulerI, lr_schedulerT, epoch)
def train(self): print("Running training for VAE on MNIST dataset") config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: self.session = sess with tf.device("/gpu:%d" % cfg.GPU_ID): counter = self.build_model(sess) saver = tf.train.Saver(tf.all_variables(), keep_checkpoint_every_n_hours=2) num_examples = self.dataset.train.num_examples updates_per_epoch = num_examples // self.batch_size epoch_start = counter // updates_per_epoch for epoch in range(epoch_start, 150 + 1): widgets = [ "epoch #%d|" % epoch, Percentage(), Bar(), ETA() ] pbar = ProgressBar(maxval=updates_per_epoch, widgets=widgets) pbar.start() vae_loss = 0 for iter in range(updates_per_epoch): input_image, input_y = self.dataset.train.next_batch( self.batch_size) input_image = np.array(input_image) # input_image = input_image.reshape((self.batch_size, 28, 28, 1)) feed_dict = {self.input_images: input_image} feed_out = [ self.vae_trainer, self.reconstructed_image, self.reconstruction_loss, self.kl_div, self.vae_loss, self.latent_space ] _, rec_img, rec_loss, kl_loss, curr_vae_loss, curr_latent_space = sess.run( feed_out, feed_dict) vae_loss += curr_vae_loss if iter % 500 == 0: # print("Printing type of current latent space: " + str(type(curr_latent_space))) eps = np.random.normal(loc=0, scale=1, size=(64, 100)) # curr_latent_space = curr_latent_space + eps curr_feed_out = [self.reconstructed_image] gen_img = sess.run( curr_feed_out, feed_dict={self.latent_space: eps})[0] gen_img = utils.reshape_and_tile_images(gen_img * 255) rec_img = utils.reshape_and_tile_images(rec_img * 255) orig_img = utils.reshape_and_tile_images( input_image * 255) gen_img_filename = self.save_dir + "/epoch_%d/%d_gen_img.jpg" % ( epoch, iter) rec_img_filename = self.save_dir + "/epoch_%d/%d_rec_img.jpg" % ( epoch, iter) orig_img_filename = self.save_dir + "/epoch_%d/%d_orig_img.jpg" % ( epoch, iter) utils.mkdir_p(self.save_dir + "/epoch_%d" % (epoch)) cv2.imwrite(rec_img_filename, rec_img) cv2.imwrite(orig_img_filename, orig_img) cv2.imwrite(gen_img_filename, gen_img) counter += 1 if counter % self.snapshot_interval == 0: snapshot_path = '%s/%s_%s.ckpt' %\ (self.log_dir, self.exp_name, str(counter)) utils.mkdir_p(snapshot_path) fn = saver.save(sess, snapshot_path) print("Model saved in file: %s" % fn) vae_loss = vae_loss // updates_per_epoch log_line = "%s: %s, %s: %s, %s: %s" % ( "vae loss", vae_loss, "reconstruction loss", rec_loss, "kl loss", kl_loss) print("Epoch %d | " % (epoch) + log_line) sys.stdout.flush()
now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') root_log_dir = "output/logs/mnist" root_checkpoint_dir = "output/ckt/mnist" batch_size = 128 # updates_per_epoch = 100 max_epoch = 50 exp_name = "mnist_%s" % timestamp log_dir = os.path.join(root_log_dir, exp_name) checkpoint_dir = os.path.join(root_checkpoint_dir, exp_name) mkdir_p(log_dir) mkdir_p(checkpoint_dir) dataset = MnistDataset(batch_size) latent_spec = [ (Uniform(62), False), (Categorical(10), True), (Uniform(1, fix_std=True), True), (Uniform(1, fix_std=True), True), ] model = RegularizedGAN(output_dist=MeanBernoulli(dataset.image_dim), latent_spec=latent_spec, batch_size=batch_size, image_shape=dataset.image_shape,
def train(self): now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') # LAMBDA_FT,LAMBDA_FI,LAMBDA_DAMSM=01,50,10 tb_dir = '../tensorboard/{0}_{1}_{2}'.format(cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) mkdir_p(tb_dir) tbw = SummaryWriter(log_dir=tb_dir) # Tensorboard logging ####### init models ######## image_encoder, start_epoch = self.build_models() labels = Variable(torch.LongTensor(range( self.batch_size))) # used for matching loss image_encoder.train() ############################################################### ###### init optimizers ##### optimizerI, lr_schedulerI = self.define_optimizers(image_encoder) ############################################ ##### init data ############################# match_labels = self.prepare_labels() batch_size = self.batch_size ################################################################## ###### init caption model criterion ############ if cfg.CUDA: labels = labels.cuda() ################################################# tensorboard_step = 0 gen_iterations = 0 # gen_iterations = start_epoch * self.num_batches #### print lambdas ### # print('LAMBDA_GEN:{0},LAMBDA_CAP:{1},LAMBDA_FT:{2},LAMBDA_FI:{3},LAMBDA_DAMSM:{4}'.format(cfg.TRAIN.SMOOTH.LAMBDA_GEN # ,cfg.TRAIN.SMOOTH.LAMBDA_CAP # ,cfg.TRAIN.SMOOTH.LAMBDA_FT # ,cfg.TRAIN.SMOOTH.LAMBDA_FI # ,cfg.TRAIN.SMOOTH.LAMBDA_DAMSM)) best_val_loss = 100000.0 for epoch in range(start_epoch, self.max_epoch): ##### set everything to trainable #### image_encoder.train() total_bce_loss_epoch = 0.0 ####### print out lr of each optimizer before training starts, make sure lrs are correct ######### print('Learning rates: lr_i %.7f' % (optimizerI.param_groups[0]['lr'])) ######################################################################################### start_t = time.time() data_iter = iter(self.data_loader) # step = 0 pbar = tqdm(range(self.num_batches)) for step in pbar: imgs, classes = data_iter.next() if cfg.CUDA: imgs, classes = imgs.cuda(), classes.cuda() # add images, image masks, captions, caption masks for catr model ################## feedforward classification model ################## image_encoder.zero_grad() y_pred = image_encoder( imgs) # input images to image encoder, feedforward bce_loss = self.criterion(y_pred, classes) bce_loss = bce_loss * self.class_weight_vector bce_loss = bce_loss.mean() total_bce_loss_epoch += bce_loss.item() bce_loss.backward() torch.nn.utils.clip_grad_norm_(image_encoder.parameters(), cfg.clip_max_norm) optimizerI.step() ##################### loss values for each step ######################################### ## damsm ## tbw.add_scalar('Train_step/loss', float(total_bce_loss_epoch / (step + 1)), step + epoch * self.num_batches) ## triplet ## ################################################################################################ ############ tqdm descriptions showing running average loss in terminal ############################## # pbar.set_description('damsm %.5f' % ( float(total_damsm_loss) / (step+1))) pbar.set_description( 'loss %.5f' % (float(total_bce_loss_epoch) / (step + 1))) ###################################################################################################### ########################################################## v_loss, auc_scores = self.evaluate(image_encoder, self.val_batch_size) print('[epoch: %d] val_loss: %.4f' % (epoch, v_loss)) print('-' * 80) ### val losses ### tbw.add_scalar('Val_step/loss', v_loss, epoch) for idx in range(len(auc_scores)): tbw.add_scalar( 'Val_step/{0}'.format( self.data_loader.dataset.idx_to_class[idx]), auc_scores[idx], epoch) lr_schedulerI.step() end_t = time.time() if v_loss < best_val_loss: best_val_loss = v_loss self.save_model(image_encoder, optimizerI, lr_schedulerI, epoch)
pprint.pprint(cfg) now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') datadir = 'Data/%s' % cfg.DATASET_NAME dataset = TextDataset(datadir, cfg.EMBEDDING_TYPE, 1) filename_test = '%s/test' % (datadir) dataset.test = dataset.get_data(filename_test) if cfg.TRAIN.FLAG: filename_train = '%s/train' % (datadir) dataset.train = dataset.get_data(filename_train) ckt_logs_dir = "ckt_logs/%s/%s_%s" % \ (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) mkdir_p(ckt_logs_dir) else: s_tmp = cfg.TRAIN.PRETRAINED_MODEL ckt_logs_dir = s_tmp[:s_tmp.find('.ckpt')] model = CondGAN( image_shape=dataset.image_shape ) algo = CondGANTrainer( model=model, dataset=dataset, ckt_logs_dir=ckt_logs_dir ) if cfg.TRAIN.FLAG: algo.train()
print('Using config:') pprint.pprint(cfg) now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') datadir = 'Data/%s' % cfg.DATASET_NAME dataset = TextDataset(datadir, cfg.EMBEDDING_TYPE, 1) filename_test = '%s/test' % (datadir) dataset.test = dataset.get_data(filename_test) if cfg.TRAIN.FLAG: filename_train = '%s/train' % (datadir) dataset.train = dataset.get_data(filename_train) ckt_logs_dir = "ckt_logs/%s/%s_%s" % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) mkdir_p(ckt_logs_dir) else: s_tmp = cfg.TRAIN.PRETRAINED_MODEL ckt_logs_dir = s_tmp[:s_tmp.find('.ckpt')] model = CondGAN(image_shape=dataset.image_shape) algo = CondGANTrainer(model=model, dataset=dataset, ckt_logs_dir=ckt_logs_dir) if cfg.TRAIN.FLAG: algo.train() else: ''' For every input text embedding/sentence in the training and test datasets, generate cfg.TRAIN.NUM_COPY images with randomness from noise z and conditioning augmentation.''' algo.evaluate()
pprint.pprint(cfg) ## now = datetime.datetime.now(dateutil.tz.tzlocal()) ## timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') datadir = args.dataset_dir dataset = TextDataset(datadir, cfg.EMBEDDING_TYPE, 1) filename_test = '%s/test' % (datadir) dataset.test = dataset.get_data(filename_test) if cfg.TRAIN.FLAG: filename_train = '%s/train' % (datadir) dataset.train = dataset.get_data(filename_train) ckt_logs_dir = "ckt_logs/%s/%s" % \ (cfg.DATASET_NAME, cfg.CONFIG_NAME) mkdir_p(ckt_logs_dir) models_dir = "models/%s/%s" % \ (cfg.DATASET_NAME, cfg.CONFIG_NAME) mkdir_p(models_dir) else: s_tmp = cfg.TRAIN.PRETRAINED_MODEL ckt_logs_dir = s_tmp[:s_tmp.find('.ckpt')] model = CondGAN(image_shape=dataset.image_shape) algo = CondGANTrainer(model=model, dataset=dataset, ckt_logs_dir=ckt_logs_dir, models_dir=models_dir) if cfg.TRAIN.FLAG: algo.train()
def save_super_images(self, images, sample_batchs, hr_sample_batchs, savenames, captions_batchs, sentenceID, save_dir, subset): # batch_size samples for each embedding # Up to 16 samples for each text embedding/sentence numSamples = len(sample_batchs) for j in range(len(savenames)): s_tmp = '%s-1real-%dsamples/%s/%s' %\ (save_dir, numSamples, subset, savenames[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) # First row with up to 8 samples real_img = (images[j] + 1.0) * 127.5 img_shape = real_img.shape padding0 = np.zeros(img_shape) padding = np.zeros((img_shape[0], 20, 3)) row1 = [padding0, real_img, padding] row2 = [padding0, real_img, padding] for i in range(np.minimum(8, numSamples)): lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) row2 = np.concatenate(row2, axis=1) superimage = np.concatenate([row1, row2], axis=0) # Second 8 samples with up to 8 samples if len(sample_batchs) > 8: row1 = [padding0, real_img, padding] row2 = [padding0, real_img, padding] for i in range(8, len(sample_batchs)): lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) row2 = np.concatenate(row2, axis=1) super_row = np.concatenate([row1, row2], axis=0) superimage2 = np.zeros_like(superimage) superimage2[:super_row.shape[0], :super_row.shape[1], :super_row.shape[2]] = super_row mid_padding = np.zeros((64, superimage.shape[1], 3)) superimage = np.concatenate([superimage, mid_padding, superimage2], axis=0) top_padding = np.zeros((128, superimage.shape[1], 3)) superimage =\ np.concatenate([top_padding, superimage], axis=0) captions = captions_batchs[j][sentenceID] fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID) superimage = self.drawCaption(np.uint8(superimage), captions) scipy.misc.imsave(fullpath, superimage)
d.text((256, 10), caption, font=fnt, fill=(255, 255, 255, 255)) else: cap1= caption[: idx] cap2 = caption[idx+1:] d.text((256, 10), cap1, font=fnt, fill=(255, 255, 255, 255)) d.text((256, 60), cap2, font=fnt, fill=(255, 255, 255, 255)) return img_txt def save_super_images(sample_batchs, hr_sample_batchs, captions_batch, batch_size, startID, save_dir): if not os.path.isdir(save_dir): print('Make a new folder: ', save_dir) mkdir_p(save_dir) # Save up to 16 samples for each text embedding/sentence img_shape=hr_sample_batchs[0][0].shape for j in range(batch_size): padding=np.zeros(img_shape) row1=[padding] row2=[padding] # First row with up to 8 samples for i in range(np.minimum(8, len(sample_batchs))): lr_img=sample_batchs[i][j] hr_img=hr_sample_batchs[i][j] hr_img=(hr_img + 1.0) * 127.5 re_sample=scipy.misc.imresize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img)