Beispiel #1
0
def save_super_images(sample_batchs, hr_sample_batchs, captions_batch,
                      batch_size, startID, save_dir):
    if not os.path.isdir(save_dir):
        print('Make a new folder: ', save_dir)
        mkdir_p(save_dir)

    img_shape = hr_sample_batchs[0][0].shape
    for j in range(batch_size):
        if not re.search('[a-zA-Z]+', captions_batch[j]):
            continue

        padding = np.ones(img_shape) * 255
        row = []
        for i in range(np.minimum(8, len(sample_batchs))):
            lr_img = sample_batchs[i][j]
            hr_img = hr_sample_batchs[i][j]
            hr_img = (hr_img + 1.0) * 127.5
            re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
            row.append(hr_img)
            row.append(np.ones((hr_img.shape[0], 100, 3)) * 255)
        row1 = np.concatenate(row[:7], axis=1)
        row2 = np.concatenate(row[8:-1], axis=1)
        mid_padding = np.ones((100, row1.shape[1], 3)) * 255
        superimage = np.concatenate([row1, mid_padding, row2], axis=0)

        top_padding = np.ones((128, superimage.shape[1], 3)) * 255
        superimage =\
            np.concatenate([top_padding, superimage], axis=0)

        fullpath = '%s/sentence%d.jpg' % (save_dir, startID + j)
        superimage = drawCaption(np.uint8(superimage), captions_batch[j])
        scipy.misc.imsave(fullpath, superimage)
Beispiel #2
0
def save_super_images(sample_batches, hr_sample_batches,
                      captions_batch, batch_size,
                      startID, save_dir):
    if not os.path.isdir(save_dir):
        print("Making a new folder: ", save_dir)
        mkdir_p(save_dir)

    # Save up to 16 samples for each text embedding
    img_shape = hr_sample_batches[0][0].shape
    for j in range(batch_size):
        if not re.search('[a-zA-Z]+', captions_batch[j]):
            continue
        
        padding = np.zeros(img_shape)
        row1 = [padding]
        row2 = [padding]

        # First row with up to 8 samples
        for i in range(np.minimum(8, len(sample_batches))):
            lr_img = sample_batches[i][j]
            hr_img = hr_sample_batches[i][j]
            hr_img = (hr_img + 1.0) * 127.5
            re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
            row1.append(re_sample)
            row2.append(hr_img)

        row1 = np.concatenate(row1, axis=1)
        row2 = np.concatenate(row2, axis=1)
        superimage = np.concatenate([row1, row2], axis=0)

        # Second row with up to 8 samples
        if len(sample_batches) > 8:
            row1 = [padding]
            row2 = [padding]

            for i in range(8, len(sample_batches)):
                lr_img = sample_batches[i][j]
                hr_img = hr_sample_batches[i][j]
                hr_img = (hr_img + 1.0) * 127.5
                re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
                row1.append(re_sample)
                row2.append(hr_img)

            row1 = np.concatenate(row1, axis=1)
            row2 = np.concatenate(row2, axis=1)
            super_row = np.concatenate([row1, row2], axis=0)
            superimage2 = np.zeros_like(superimage)
            superimage2[:super_row.shape[0],
                        :super_row.shape[1],
                        :super_row.shape[2]] = super_row

            mid_padding = np.zeros((64, superimage.shape[1], 3))
            superimage = np.concatenate([superimage, mid_padding, superimage2], axis=0)

        top_padding = np.zeros((128, superimage.shape[1], 3))
        superimage = np.concatenate([top_padding, superimage], axis=0)

        fullpath = '%s/sentence%d.jpg' % (save_dir, startID + j)
        superimage = drawCaption(np.uint8(superimage), captions_batch[j])
        scipy.misc.imsave(fullpath, superimage)
 def save_super_images(self, images, sample_batchs, filenames,
                       sentenceID, save_dir, subset):
     # batch_size samples for each embedding
     numSamples = len(sample_batchs)
     for j in range(len(filenames)):
         s_tmp_r = '%s-1real-%dsamples/%s/real-latents/%s' %\
             (save_dir, numSamples, subset, filenames[j])
         s_tmp_f = '%s-1real-%dsamples/%s/fake-latents/%s' %\
             (save_dir, numSamples, subset, filenames[j])
         folder_r = s_tmp_r[:s_tmp_r.rfind('/')]
         if not os.path.isdir(folder_r):
             print('Make a new folder: ', folder_r)
             mkdir_p(folder_r)
         folder_f = s_tmp_f[:s_tmp_f.rfind('/')]
         if not os.path.isdir(folder_f):
             print('Make a new folder: ', folder_f)
             mkdir_p(folder_f)
     #superimage_r = [images[j]]
         #superimage_f = [images[j]]
         # cfg.TRAIN.NUM_COPY samples for each text embedding/sentence
         for i in range(len(sample_batchs)):
             #superimage_r.append(sample_batchs[i][0][j])
             #superimage_f.append(sample_batchs[i][1][j])
             scipy.misc.imsave('%s_sentence%d_%d.jpg' % (s_tmp_r, sentenceID, i), sample_batchs[i][0][j])
             scipy.misc.imsave('%s_sentence%d_%d.jpg' % (s_tmp_f, sentenceID, i), sample_batchs[i][1][j])
Beispiel #4
0
    def __init__(self, output_dir, data_loader, dataloader_val):
        if cfg.TRAIN:
            self.model_dir = os.path.join(output_dir, 'Model')
            self.image_dir = os.path.join(output_dir, 'Image')
            mkdir_p(self.model_dir)
            mkdir_p(self.image_dir)

        torch.cuda.set_device(cfg.GPU_ID)
        cudnn.benchmark = True

        self.batch_size = data_loader.batch_size
        self.val_batch_size = dataloader_val.batch_size
        self.max_epoch = cfg.epochs
        self.snapshot_interval = cfg.snapshot_interval

        self.data_loader = data_loader
        self.dataloader_val = dataloader_val
        self.num_batches = len(self.data_loader)
        self.bert_config = BertConfig(
            vocab_size=data_loader.dataset.vocab_size,
            hidden_size=512,
            num_hidden_layers=3,
            num_attention_heads=8,
            intermediate_size=2048,
            hidden_act='gelu',
            hidden_dropout_prob=cfg.hidden_dropout_prob,
            attention_probs_dropout_prob=cfg.attention_probs_dropout_prob,
            max_position_embeddings=512,
            layer_norm_eps=1e-12,
            initializer_range=0.02,
            type_vocab_size=2,
            pad_token_id=0)
    def __init__(self, output_dir, data_loader, dataloader_val):
        if cfg.TRAIN:
            self.model_dir = os.path.join(output_dir, 'Model')
            self.image_dir = os.path.join(output_dir, 'Image')
            mkdir_p(self.model_dir)
            mkdir_p(self.image_dir)

        torch.cuda.set_device(cfg.GPU_ID)
        cudnn.benchmark = True
        Q_c = np.array([46, 53, 86, 103, 137, 265, 290, 2422])
        Q = 3166.0
        self.class_weight_vector = torch.tensor((Q - Q_c) / Q)
        self.batch_size = data_loader.batch_size
        self.val_batch_size = dataloader_val.batch_size
        self.max_epoch = cfg.epochs
        self.snapshot_interval = cfg.snapshot_interval
        pos_weights = torch.tensor(
            [3.255, 3.255, 3.255, 3.255, 3.255, 3.255, 3.255, 1.0])
        self.criterion = nn.BCEWithLogitsLoss(reduction='none',
                                              pos_weight=pos_weights)

        self.data_loader = data_loader
        self.dataloader_val = dataloader_val
        self.num_batches = len(self.data_loader)
        if cfg.CUDA:
            pos_weights = pos_weights.cuda()
            self.class_weight_vector = self.class_weight_vector.cuda()
            self.criterion = self.criterion.cuda()
Beispiel #6
0
    def partial_fit(self, X, last=False):
        """Train model based on mini-batch of input data.

        Return cost of mini-batch.
        """

        opt, cost, cross_entropy, MI, summary = \
            self.sess.run((self.optimizer, self.cost,
                          self.cross_entropy,
                          self.MI,
                          self.merged),
                          feed_dict={self.x: X})

        self.train_summary_writer.add_summary(summary, self.step)
        if last:
            now = datetime.datetime.now(dateutil.tz.tzlocal())
            timestamp = now.strftime('%H_%M_%S_%Y%m%d')
            n_z = self.network_architecture['n_z']
            n_c = self.network_architecture['n_c']

            savefolder = '{}/DS-{}_nz{}_nc{}_info{}_{}'.format(SAVE_MODEL_TO,
                                                               self.dataset_name,
                                                               n_z, n_c,
                                                               self.info,
                                                               timestamp)
            mkdir_p(savefolder)
            self.saver.save(self.sess, '{}/model'.format(savefolder))

        self.step += 1

        return cost
Beispiel #7
0
def save_super_images(sample_batchs, hr_sample_batchs,
                      captions_batch, batch_size,
                      startID, save_dir):
    if not os.path.isdir(save_dir):
        print('Make a new folder: ', save_dir)
        mkdir_p(save_dir)

    # Save up to 16 samples for each text embedding/sentence
    img_shape = hr_sample_batchs[0][0].shape
    for j in range(batch_size):
        if not re.search('[a-zA-Z]+', captions_batch[j]):
            continue

        padding = np.zeros(img_shape)
        row1 = [padding]
        row2 = [padding]
        # First row with up to 8 samples
        for i in range(np.minimum(8, len(sample_batchs))):
            lr_img = sample_batchs[i][j]
            hr_img = hr_sample_batchs[i][j]
            hr_img = (hr_img + 1.0) * 127.5
            re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
            row1.append(re_sample)
            row2.append(hr_img)
        row1 = np.concatenate(row1, axis=1)
        row2 = np.concatenate(row2, axis=1)
        superimage = np.concatenate([row1, row2], axis=0)

        # Second 8 samples with up to 8 samples
        if len(sample_batchs) > 8:
            row1 = [padding]
            row2 = [padding]
            for i in range(8, len(sample_batchs)):
                lr_img = sample_batchs[i][j]
                hr_img = hr_sample_batchs[i][j]
                hr_img = (hr_img + 1.0) * 127.5
                re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
                row1.append(re_sample)
                row2.append(hr_img)
            row1 = np.concatenate(row1, axis=1)
            row2 = np.concatenate(row2, axis=1)
            super_row = np.concatenate([row1, row2], axis=0)
            superimage2 = np.zeros_like(superimage)
            superimage2[:super_row.shape[0],
                        :super_row.shape[1],
                        :super_row.shape[2]] = super_row
            mid_padding = np.zeros((64, superimage.shape[1], 3))
            superimage =\
                np.concatenate([superimage, mid_padding, superimage2], axis=0)

        top_padding = np.zeros((128, superimage.shape[1], 3))
        superimage =\
            np.concatenate([top_padding, superimage], axis=0)

        fullpath = '%s/sentence%d.jpg' % (save_dir, startID + j)
        superimage = drawCaption(np.uint8(superimage), captions_batch[j])
        scipy.misc.imsave(fullpath, superimage)
Beispiel #8
0
    def sample(self, datapath, stage=1):
        if stage == 1:
            netG, _ = self.load_network_stageI()
        else:
            netG, _ = self.load_network_stageII()
        netG.eval()

        # Load text embeddings generated from the encoder
        t_file = torchfile.load(datapath)
        captions_list = t_file.raw_txt
        embeddings = np.concatenate(t_file.fea_txt, axis=0)
        num_embeddings = len(captions_list)
        print('Successfully load sentences from: ', datapath)
        print('Total number of sentences:', num_embeddings)
        print('num_embeddings:', num_embeddings, embeddings.shape)
        # path to save generated samples
        save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')]
        mkdir_p(save_dir)

        batch_size = np.minimum(num_embeddings, self.batch_size)
        nz = cfg.Z_DIM
        noise = Variable(torch.FloatTensor(batch_size, nz))
        if cfg.CUDA:
            noise = noise.cuda()
        count = 0
        while count < num_embeddings:
            if count > 3000:
                break
            iend = count + batch_size
            if iend > num_embeddings:
                iend = num_embeddings
                count = num_embeddings - batch_size
            embeddings_batch = embeddings[count:iend]
            # captions_batch = captions_list[count:iend]
            txt_embedding = Variable(torch.FloatTensor(embeddings_batch))
            if cfg.CUDA:
                txt_embedding = txt_embedding.cuda()

            #######################################################
            # (2) Generate fake images
            ######################################################
            noise.data.normal_(0, 1)
            inputs = (txt_embedding, noise)
            _, fake_imgs, mu, logvar = \
                nn.parallel.data_parallel(netG, inputs, self.gpus)
            for i in range(batch_size):
                save_name = '%s/%d.png' % (save_dir, count + i)
                im = fake_imgs[i].data.cpu().numpy()
                im = (im + 1.0) * 127.5
                im = im.astype(np.uint8)
                # print('im', im.shape)
                im = np.transpose(im, (1, 2, 0))
                # print('im', im.shape)
                im = Image.fromarray(im)
                im.save(save_name)
            count += batch_size
Beispiel #9
0
 def save_super_images(self, images, sample_batchs, hr_sample_batchs,
                       savenames, captions_batchs, sentenceID, save_dir,
                       subset):
     # batch_size samples for each embedding
     # Up to 16 samples for each text embedding/sentence
     numSamples = len(sample_batchs)
     for j in range(len(savenames)):
         s_tmp = '%s-1real-%dsamples/%s/%s' % (save_dir, numSamples, subset,
                                               savenames[j])
         folder = s_tmp[:s_tmp.rfind('/')]
         if not os.path.isdir(folder):
             mkdir_p(folder)
         # first row with up to 8 samples
         real_img = (images[j] + 1.0) * 127.5
         img_shape = real_img.shape
         padding0 = np.zeros(img_shape)
         padding = np.zeros((img_shape[0], 20, 3))
         row1 = [padding0, real_img, padding]
         row2 = [padding0, real_img, padding]
         for i in range(np.minimum(8, numSamples)):
             lr_img = sample_batchs[i][j]
             hr_img = hr_sample_batchs[i][j]
             hr_img = (hr_img + 1.0) * 127.5
             re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
             row1.append(re_sample)
             row2.append(hr_img)
         row1 = np.concatenate(row1, axis=1)
         row2 = np.concatenate(row2, axis=1)
         superimage = np.concatenate([row1, row2], axis=0)
         # second 8 samples with up to 8 samples
         if len(sample_batchs) > 8:
             row1 = [padding0, real_img, padding]
             row2 = [padding0, real_img, padding]
             for i in range(8, len(sample_batchs)):
                 lr_img = sample_batchs[i][j]
                 hr_img = hr_sample_batchs[i][j]
                 hr_img = (hr_img + 1.0) * 127.5
                 re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
                 row1.append(re_sample)
                 row2.append(hr_img)
             row1 = np.concatenate(row1, axis=1)
             row2 = np.concatenate(row2, axis=1)
             super_row = np.concatenate([row1, row2], axis=0)
             superimage2 = np.zeros_like(superimage)
             superimage2[:super_row.shape[0], :super_row.
                         shape[1], :super_row.shape[2]] = super_row
             mid_padding = np.zeros((64, superimage.shape[1], 3))
             superimage = np.concatenate(
                 [superimage, mid_padding, superimage2], axis=0)
         top_padding = np.zeros((128, superimage.shape[1], 3))
         superimage = np.concatenate([top_padding, superimage], axis=0)
         captions = captions_batchs[j][sentenceID]
         fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID)
         superimage = self.drawCaption(np.uint8(superimage), captions)
         scipy.misc.imsave(fullpath, superimage)
Beispiel #10
0
def save_images(samples_batches, startID, save_dir):
    if not os.path.isdir(save_dir):
        print('Make a new folder: ', save_dir)
        mkdir_p(save_dir)
    
    k = 0
    for samples in samples_batches:
        for sample in samples:
            full_path = os.path.join(save_dir, startID + k)
            sp.misc.imsave(str(full_path), sample)
            k += 1
    print("%i images saved in %s directory" % (k, save_dir))
 def save_super_images(self, images, sample_batchs, filenames, sentenceID,
                       save_dir, subset):
     # batch_size samples for each embedding
     numSamples = len(sample_batchs)
     for j in range(len(filenames)):
         s_tmp = '%s-1real-%dsamples/%s/%s' %\
             (save_dir, numSamples, subset, filenames[j])
         folder = s_tmp[:s_tmp.rfind('/')]
         if not os.path.isdir(folder):
             print('Make a new folder: ', folder)
             mkdir_p(folder)
         superimage = [images[j]]
         # cfg.TRAIN.NUM_COPY samples for each text embedding/sentence
         for i in range(len(sample_batchs)):
             superimage.append(sample_batchs[i][j])
         superimage = np.concatenate(superimage, axis=1)
         fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID)
         scipy.misc.imsave(fullpath, superimage)
Beispiel #12
0
    def save_super_images(self, images, sample_batchs, filenames,
                          sentenceID, save_dir, subset):
        # batch_size samples for each embedding
        numSamples = len(sample_batchs)
        for j in range(len(filenames)):
            s_tmp = '%s-1real-%dsamples/%s/%s' %\
                (save_dir, numSamples, subset, filenames[j])
            folder = s_tmp[:s_tmp.rfind('/')]
            if not os.path.isdir(folder):
                print('Make a new folder: ', folder)
                mkdir_p(folder)
            superimage = [images[j]]
            # cfg.TRAIN.NUM_COPY samples for each text embedding/sentence
            for i in range(len(sample_batchs)):
                superimage.append(sample_batchs[i][j])

            superimage = np.concatenate(superimage, axis=1)
            fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID)
            scipy.misc.imsave(fullpath, superimage)
    def __init__(self,
                 moments_path, # if use preprocess, this should be removed
                 ckt_logs_dir='ckt_logs',
                 exp_name='a2t_model_'+datetime.now().strftime('%Y%m%d-%H:%M:%S')):
        self.model = None
        self.model_path = ''
        self.optimizer = None
        self.lr_scheduler = None
        self.epoch = 0
        self.global_step = 0

        self.plot_func = plot_func()
        self.moments_path = moments_path
        self.N_GPU = len(cfg.TRAIN.GPU_ID)
        self.exp_name = exp_name

        if not cfg.TRAIN.FLAG: # test
            ckt_logs_dir = cfg.TEST.PRETRAINED_MODEL_DIR
            self.model_path = os.path.join(cfg.TEST.PRETRAINED_MODEL_DIR, cfg.TEST.MODEL_FP)
        elif cfg.TRAIN.RESTORE: # train from restored model
            ckt_logs_dir = cfg.TRAIN.PRETRAINED_MODEL_DIR
            self.model_path = os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, cfg.TRAIN.MODEL_FP)
        else:
            ckt_logs_dir = os.path.join(ckt_logs_dir, self.exp_name)
            mkdir_p(ckt_logs_dir)
        self.log_dir = ckt_logs_dir
        # # Currently use tensorboard to record test results, thus this folder not used
        # self.test_log_dir = os.path.join(self.log_dir, 'test/tvs_test')
        # mkdir_p(self.test_log_dir)
        
        self.LR_STARTER = cfg.A2TTRAIN.LR
        self.LR_DECAY_EPOCH = cfg.A2TTRAIN.LR_DECAY_EPOCH
        self.LR_DECAY_RATE = cfg.A2TTRAIN.LR_DECAY_RATE

        self.writer = tensorboard.SummaryWriter(self.log_dir)
        self.__build_model()
Beispiel #14
0
    def __init__(self, output_dir):
        if cfg.TRAIN.FLAG:
            self.model_dir = os.path.join(output_dir, 'Model')
            self.image_dir = os.path.join(output_dir, 'Image')
            self.log_dir = os.path.join(output_dir, 'Log')
            mkdir_p(self.model_dir)
            mkdir_p(self.image_dir)
            mkdir_p(self.log_dir)
            self.summary_writer = FileWriter(self.log_dir)

        self.max_epoch = cfg.TRAIN.MAX_EPOCH
        self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL

        s_gpus = cfg.GPU_ID.split(',')
        self.gpus = [int(ix) for ix in s_gpus]
        self.num_gpus = len(self.gpus)
        self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus
        torch.cuda.set_device(self.gpus[0])
        cudnn.benchmark = True
Beispiel #15
0
    def train(self):
        
        now = datetime.datetime.now(dateutil.tz.tzlocal())
        timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
        #     LAMBDA_FT,LAMBDA_FI,LAMBDA_DAMSM=01,50,10
        tb_dir = '../tensorboard/{0}_{1}_{2}'.format(cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)
        mkdir_p(tb_dir)
        tbw = SummaryWriter(log_dir=tb_dir) # Tensorboard logging

        
        ####### init models ########
        text_encoder, image_encoder, start_epoch, = self.build_models()
        labels = Variable(torch.LongTensor(range(self.batch_size))) # used for matching loss
        
        text_encoder.train()
        image_encoder.train()
    
        ###############################################################
        
        ###### init optimizers #####
        optimizerI, optimizerT, lr_schedulerI, lr_schedulerT = self.define_optimizers(image_encoder, text_encoder)
        ############################################
        
        ##### init data #############################
        
        match_labels = self.prepare_labels()

        batch_size = self.batch_size
        ##################################################################
        
        
        
        ###### init caption model criterion ############
        if cfg.CUDA:
            labels = labels.cuda()
        #################################################
        
        tensorboard_step = 0
        gen_iterations = 0
        # gen_iterations = start_epoch * self.num_batches
        
        #### print lambdas ###
#         print('LAMBDA_GEN:{0},LAMBDA_CAP:{1},LAMBDA_FT:{2},LAMBDA_FI:{3},LAMBDA_DAMSM:{4}'.format(cfg.TRAIN.SMOOTH.LAMBDA_GEN
#                                                                                                   ,cfg.TRAIN.SMOOTH.LAMBDA_CAP
#                                                                                                   ,cfg.TRAIN.SMOOTH.LAMBDA_FT
#                                                                                                   ,cfg.TRAIN.SMOOTH.LAMBDA_FI                                                                                                  
#                                                                                                   ,cfg.TRAIN.SMOOTH.LAMBDA_DAMSM))
        
        for epoch in range(start_epoch, self.max_epoch):
            
            ##### set everything to trainable ####
            text_encoder.train()
            image_encoder.train()
            ####################################
            
            ####### init loss variables ############          
            s_total_loss0 = 0
            s_total_loss1 = 0
            w_total_loss0 = 0
            w_total_loss1 = 0
            
            s_t_total_loss0 = 0
            s_t_total_loss1 = 0
            w_t_total_loss0 = 0
            w_t_total_loss1 = 0
            
            total_damsm_loss = 0
            total_t_loss = 0
                      
            ####### print out lr of each optimizer before training starts, make sure lrs are correct #########
            print('Learning rates: lr_i %.7f, lr_t %.7f' 
                 % (optimizerI.param_groups[0]['lr'], optimizerT.param_groups[0]['lr']))
                     
            #########################################################################################
            
            start_t = time.time()

            data_iter = iter(self.data_loader)
#             step = 0
            pbar = tqdm(range(self.num_batches))
            for step in pbar: 
#             while step < self.num_batches:
                ######################################################
                # (1) Prepare training data and Compute text embeddings
                ######################################################
                imgs, captions, masks, class_ids, cap_lens = data_iter.next()
                class_ids = class_ids.numpy()
                
                ids = np.array(list(range(batch_size)))
                neg_ids = Variable(torch.LongTensor([np.random.choice(ids[ids!=x]) for x in ids])) # used for matching loss
                
                if cfg.CUDA:
                    imgs, captions, masks, cap_lens = imgs.cuda(), captions.cuda(), masks.cuda(), cap_lens.cuda()
                    neg_ids = neg_ids.cuda()
                # add images, image masks, captions, caption masks for catr model
                
                ################## feedforward damsm model ##################
                image_encoder.zero_grad() # image/text encoders zero_grad here
                text_encoder.zero_grad()
                
                words_features, sent_code = image_encoder(imgs) # input images to image encoder, feedforward
                nef, att_sze = words_features.size(1), words_features.size(2)
                # hidden = text_encoder.init_hidden(batch_size)
                # words_embs: batch_size x nef x seq_len
                # sent_emb: batch_size x nef
                words_embs, sent_emb = text_encoder(captions, masks) 
                
#                 #### damsm losses
#                 w_loss0, w_loss1, attn_maps = words_loss(words_features, words_embs[:,:,1:], labels, cap_lens-1, class_ids, batch_size)
#                 w_total_loss0 += w_loss0.item()
#                 w_total_loss1 += w_loss1.item()
#                 damsm_loss = w_loss0 + w_loss1
                
#                 s_loss0, s_loss1 = sent_loss(sent_code, sent_emb, labels, class_ids, batch_size)
#                 s_total_loss0 += s_loss0.item()
#                 s_total_loss1 += s_loss1.item()
#                 damsm_loss += s_loss0 + s_loss1
                
#                 total_damsm_loss += damsm_loss.item()
                
#                 #### triplet loss
                s_t_loss0, s_t_loss1 = sent_triplet_loss(sent_code, sent_emb, labels, neg_ids, batch_size)
                s_t_total_loss0 += s_t_loss0.item()
                s_t_total_loss1 += s_t_loss1.item()
                t_loss = s_t_loss0 + s_t_loss1
                
#                 w_t_loss0, w_t_loss1, attn_maps = words_triplet_loss(words_features,words_embs[:,:,1:], labels, neg_ids, cap_lens-1, batch_size)
#                 w_t_total_loss0 += w_t_loss0.item()
#                 w_t_total_loss1 += w_t_loss1.item()
#                 t_loss += w_t_loss0 + w_t_loss1
                
                total_t_loss += t_loss.item()
                ############################################################################
                
                
                
#                 damsm_loss.backward()
                t_loss.backward()
    
                torch.nn.utils.clip_grad_norm_(image_encoder.parameters(), cfg.clip_max_norm)                    
                optimizerI.step()
                
                torch.nn.utils.clip_grad_norm_(text_encoder.parameters(), cfg.clip_max_norm)
                optimizerT.step()
                ##################### loss values for each step #########################################
#                 ## damsm ##
#                 tbw.add_scalar('Train_step/train_w_step_loss0', float(w_loss0.item()), step + epoch * self.num_batches)
#                 tbw.add_scalar('Train_step/train_s_step_loss0', float(s_loss0.item()), step + epoch * self.num_batches)
#                 tbw.add_scalar('Train_step/train_w_step_loss1', float(w_loss1.item()), step + epoch * self.num_batches)
#                 tbw.add_scalar('Train_step/train_s_step_loss1', float(s_loss1.item()), step + epoch * self.num_batches)
#                 tbw.add_scalar('Train_step/train_damsm_step_loss', float(damsm_loss.item()), step + epoch * self.num_batches)

                ## triplet ##
#                 tbw.add_scalar('Train_step/train_w_t_step_loss0', float(w_t_loss0.item()), step + epoch * self.num_batches)
                tbw.add_scalar('Train_step/train_s_t_step_loss0', float(s_t_loss0.item()), step + epoch * self.num_batches)
#                 tbw.add_scalar('Train_step/train_w_t_step_loss1', float(w_t_loss1.item()), step + epoch * self.num_batches)
                tbw.add_scalar('Train_step/train_s_t_step_loss1', float(s_t_loss1.item()), step + epoch * self.num_batches)
                tbw.add_scalar('Train_step/train_t_step_loss', float(t_loss.item()), step + epoch * self.num_batches)

                ################################################################################################    
                
                ############ tqdm descriptions showing running average loss in terminal ##############################
#                 pbar.set_description('damsm %.5f' % ( float(total_damsm_loss) / (step+1)))
                pbar.set_description('triplet %.5f' % ( float(total_t_loss) / (step+1)))
                ######################################################################################################
                ##########################################################
#             v_s_cur_loss, v_w_cur_loss = self.evaluate(image_encoder, text_encoder, self.val_batch_size)
#             print('[epoch: %d] val_w_loss: %.4f, val_s_loss: %.4f' % (epoch, v_w_cur_loss, v_s_cur_loss))
#             ### val losses ###
#             tbw.add_scalar('Val_step/val_w_loss', float(v_w_cur_loss), epoch)
#             tbw.add_scalar('Val_step/val_s_loss', float(v_s_cur_loss), epoch)

            v_s_cur_loss, _ = self.evaluate(image_encoder, text_encoder, self.val_batch_size)
            print('[epoch: %d] val_s_loss: %.4f' % (epoch, v_s_cur_loss))
            ### val losses ###
            tbw.add_scalar('Val_step/val_s_loss', float(v_s_cur_loss), epoch)
            
            lr_schedulerI.step()
            lr_schedulerT.step()
            
            end_t = time.time()
            
            if epoch % cfg.snapshot_interval == 0:
                self.save_model(image_encoder, text_encoder, optimizerI, optimizerT, lr_schedulerI, lr_schedulerT, epoch)                
                
            

        self.save_model(image_encoder, text_encoder, optimizerI, optimizerT, lr_schedulerI, lr_schedulerT, epoch)                
    def train(self):
        print("Running training for VAE on MNIST dataset")
        config = tf.ConfigProto(allow_soft_placement=True)

        with tf.Session(config=config) as sess:
            self.session = sess
            with tf.device("/gpu:%d" % cfg.GPU_ID):
                counter = self.build_model(sess)
                saver = tf.train.Saver(tf.all_variables(),
                                       keep_checkpoint_every_n_hours=2)
                num_examples = self.dataset.train.num_examples
                updates_per_epoch = num_examples // self.batch_size
                epoch_start = counter // updates_per_epoch
                for epoch in range(epoch_start, 150 + 1):
                    widgets = [
                        "epoch #%d|" % epoch,
                        Percentage(),
                        Bar(),
                        ETA()
                    ]
                    pbar = ProgressBar(maxval=updates_per_epoch,
                                       widgets=widgets)
                    pbar.start()
                    vae_loss = 0
                    for iter in range(updates_per_epoch):
                        input_image, input_y = self.dataset.train.next_batch(
                            self.batch_size)
                        input_image = np.array(input_image)
                        # input_image = input_image.reshape((self.batch_size, 28, 28, 1))
                        feed_dict = {self.input_images: input_image}
                        feed_out = [
                            self.vae_trainer, self.reconstructed_image,
                            self.reconstruction_loss, self.kl_div,
                            self.vae_loss, self.latent_space
                        ]
                        _, rec_img, rec_loss, kl_loss, curr_vae_loss, curr_latent_space = sess.run(
                            feed_out, feed_dict)
                        vae_loss += curr_vae_loss

                        if iter % 500 == 0:
                            # print("Printing type of current latent space: " + str(type(curr_latent_space)))
                            eps = np.random.normal(loc=0,
                                                   scale=1,
                                                   size=(64, 100))
                            # curr_latent_space = curr_latent_space + eps
                            curr_feed_out = [self.reconstructed_image]
                            gen_img = sess.run(
                                curr_feed_out,
                                feed_dict={self.latent_space: eps})[0]

                            gen_img = utils.reshape_and_tile_images(gen_img *
                                                                    255)
                            rec_img = utils.reshape_and_tile_images(rec_img *
                                                                    255)
                            orig_img = utils.reshape_and_tile_images(
                                input_image * 255)
                            gen_img_filename = self.save_dir + "/epoch_%d/%d_gen_img.jpg" % (
                                epoch, iter)
                            rec_img_filename = self.save_dir + "/epoch_%d/%d_rec_img.jpg" % (
                                epoch, iter)
                            orig_img_filename = self.save_dir + "/epoch_%d/%d_orig_img.jpg" % (
                                epoch, iter)
                            utils.mkdir_p(self.save_dir + "/epoch_%d" %
                                          (epoch))
                            cv2.imwrite(rec_img_filename, rec_img)
                            cv2.imwrite(orig_img_filename, orig_img)
                            cv2.imwrite(gen_img_filename, gen_img)
                        counter += 1
                        if counter % self.snapshot_interval == 0:
                            snapshot_path = '%s/%s_%s.ckpt' %\
                                            (self.log_dir,
                                             self.exp_name,
                                             str(counter))
                            utils.mkdir_p(snapshot_path)
                            fn = saver.save(sess, snapshot_path)
                            print("Model saved in file: %s" % fn)
                    vae_loss = vae_loss // updates_per_epoch
                    log_line = "%s: %s, %s: %s, %s: %s" % (
                        "vae loss", vae_loss, "reconstruction loss", rec_loss,
                        "kl loss", kl_loss)
                    print("Epoch %d | " % (epoch) + log_line)
                    sys.stdout.flush()
Beispiel #17
0
    now = datetime.datetime.now(dateutil.tz.tzlocal())
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')

    root_log_dir = "output/logs/mnist"
    root_checkpoint_dir = "output/ckt/mnist"
    batch_size = 128
    # updates_per_epoch = 100
    max_epoch = 50

    exp_name = "mnist_%s" % timestamp

    log_dir = os.path.join(root_log_dir, exp_name)
    checkpoint_dir = os.path.join(root_checkpoint_dir, exp_name)

    mkdir_p(log_dir)
    mkdir_p(checkpoint_dir)

    dataset = MnistDataset(batch_size)

    latent_spec = [
        (Uniform(62), False),
        (Categorical(10), True),
        (Uniform(1, fix_std=True), True),
        (Uniform(1, fix_std=True), True),
    ]

    model = RegularizedGAN(output_dist=MeanBernoulli(dataset.image_dim),
                           latent_spec=latent_spec,
                           batch_size=batch_size,
                           image_shape=dataset.image_shape,
    def train(self):

        now = datetime.datetime.now(dateutil.tz.tzlocal())
        timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
        #     LAMBDA_FT,LAMBDA_FI,LAMBDA_DAMSM=01,50,10
        tb_dir = '../tensorboard/{0}_{1}_{2}'.format(cfg.DATASET_NAME,
                                                     cfg.CONFIG_NAME,
                                                     timestamp)
        mkdir_p(tb_dir)
        tbw = SummaryWriter(log_dir=tb_dir)  # Tensorboard logging

        ####### init models ########
        image_encoder, start_epoch = self.build_models()
        labels = Variable(torch.LongTensor(range(
            self.batch_size)))  # used for matching loss

        image_encoder.train()

        ###############################################################

        ###### init optimizers #####
        optimizerI, lr_schedulerI = self.define_optimizers(image_encoder)
        ############################################

        ##### init data #############################

        match_labels = self.prepare_labels()

        batch_size = self.batch_size
        ##################################################################

        ###### init caption model criterion ############
        if cfg.CUDA:
            labels = labels.cuda()
        #################################################

        tensorboard_step = 0
        gen_iterations = 0
        # gen_iterations = start_epoch * self.num_batches

        #### print lambdas ###
        #         print('LAMBDA_GEN:{0},LAMBDA_CAP:{1},LAMBDA_FT:{2},LAMBDA_FI:{3},LAMBDA_DAMSM:{4}'.format(cfg.TRAIN.SMOOTH.LAMBDA_GEN
        #                                                                                                   ,cfg.TRAIN.SMOOTH.LAMBDA_CAP
        #                                                                                                   ,cfg.TRAIN.SMOOTH.LAMBDA_FT
        #                                                                                                   ,cfg.TRAIN.SMOOTH.LAMBDA_FI
        #                                                                                                   ,cfg.TRAIN.SMOOTH.LAMBDA_DAMSM))

        best_val_loss = 100000.0

        for epoch in range(start_epoch, self.max_epoch):

            ##### set everything to trainable ####
            image_encoder.train()
            total_bce_loss_epoch = 0.0

            ####### print out lr of each optimizer before training starts, make sure lrs are correct #########
            print('Learning rates: lr_i %.7f' %
                  (optimizerI.param_groups[0]['lr']))

            #########################################################################################

            start_t = time.time()

            data_iter = iter(self.data_loader)
            #             step = 0
            pbar = tqdm(range(self.num_batches))

            for step in pbar:
                imgs, classes = data_iter.next()
                if cfg.CUDA:
                    imgs, classes = imgs.cuda(), classes.cuda()
                # add images, image masks, captions, caption masks for catr model

                ################## feedforward classification model ##################
                image_encoder.zero_grad()

                y_pred = image_encoder(
                    imgs)  # input images to image encoder, feedforward
                bce_loss = self.criterion(y_pred, classes)
                bce_loss = bce_loss * self.class_weight_vector
                bce_loss = bce_loss.mean()
                total_bce_loss_epoch += bce_loss.item()

                bce_loss.backward()

                torch.nn.utils.clip_grad_norm_(image_encoder.parameters(),
                                               cfg.clip_max_norm)
                optimizerI.step()
                ##################### loss values for each step #########################################
                ## damsm ##
                tbw.add_scalar('Train_step/loss',
                               float(total_bce_loss_epoch / (step + 1)),
                               step + epoch * self.num_batches)
                ## triplet ##
                ################################################################################################

                ############ tqdm descriptions showing running average loss in terminal ##############################
                #                 pbar.set_description('damsm %.5f' % ( float(total_damsm_loss) / (step+1)))
                pbar.set_description(
                    'loss %.5f' % (float(total_bce_loss_epoch) / (step + 1)))
                ######################################################################################################
                ##########################################################
            v_loss, auc_scores = self.evaluate(image_encoder,
                                               self.val_batch_size)
            print('[epoch: %d] val_loss: %.4f' % (epoch, v_loss))
            print('-' * 80)
            ### val losses ###
            tbw.add_scalar('Val_step/loss', v_loss, epoch)
            for idx in range(len(auc_scores)):
                tbw.add_scalar(
                    'Val_step/{0}'.format(
                        self.data_loader.dataset.idx_to_class[idx]),
                    auc_scores[idx], epoch)

            lr_schedulerI.step()
            end_t = time.time()

            if v_loss < best_val_loss:
                best_val_loss = v_loss
                self.save_model(image_encoder, optimizerI, lr_schedulerI,
                                epoch)
Beispiel #19
0
    pprint.pprint(cfg)

    now = datetime.datetime.now(dateutil.tz.tzlocal())
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')

    datadir = 'Data/%s' % cfg.DATASET_NAME
    dataset = TextDataset(datadir, cfg.EMBEDDING_TYPE, 1)
    filename_test = '%s/test' % (datadir)
    dataset.test = dataset.get_data(filename_test)
    if cfg.TRAIN.FLAG:
        filename_train = '%s/train' % (datadir)
        dataset.train = dataset.get_data(filename_train)

        ckt_logs_dir = "ckt_logs/%s/%s_%s" % \
            (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)
        mkdir_p(ckt_logs_dir)
    else:
        s_tmp = cfg.TRAIN.PRETRAINED_MODEL
        ckt_logs_dir = s_tmp[:s_tmp.find('.ckpt')]

    model = CondGAN(
        image_shape=dataset.image_shape
    )

    algo = CondGANTrainer(
        model=model,
        dataset=dataset,
        ckt_logs_dir=ckt_logs_dir
    )
    if cfg.TRAIN.FLAG:
        algo.train()
Beispiel #20
0
    print('Using config:')
    pprint.pprint(cfg)

    now = datetime.datetime.now(dateutil.tz.tzlocal())
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')

    datadir = 'Data/%s' % cfg.DATASET_NAME
    dataset = TextDataset(datadir, cfg.EMBEDDING_TYPE, 1)
    filename_test = '%s/test' % (datadir)
    dataset.test = dataset.get_data(filename_test)
    if cfg.TRAIN.FLAG:
        filename_train = '%s/train' % (datadir)
        dataset.train = dataset.get_data(filename_train)
        ckt_logs_dir = "ckt_logs/%s/%s_%s" % (cfg.DATASET_NAME,
                                              cfg.CONFIG_NAME, timestamp)
        mkdir_p(ckt_logs_dir)
    else:
        s_tmp = cfg.TRAIN.PRETRAINED_MODEL
        ckt_logs_dir = s_tmp[:s_tmp.find('.ckpt')]

    model = CondGAN(image_shape=dataset.image_shape)
    algo = CondGANTrainer(model=model,
                          dataset=dataset,
                          ckt_logs_dir=ckt_logs_dir)
    if cfg.TRAIN.FLAG:
        algo.train()
    else:
        ''' For every input text embedding/sentence in the
        training and test datasets, generate cfg.TRAIN.NUM_COPY
        images with randomness from noise z and conditioning augmentation.'''
        algo.evaluate()
Beispiel #21
0
    pprint.pprint(cfg)

    ## now = datetime.datetime.now(dateutil.tz.tzlocal())
    ## timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')

    datadir = args.dataset_dir
    dataset = TextDataset(datadir, cfg.EMBEDDING_TYPE, 1)
    filename_test = '%s/test' % (datadir)
    dataset.test = dataset.get_data(filename_test)
    if cfg.TRAIN.FLAG:
        filename_train = '%s/train' % (datadir)
        dataset.train = dataset.get_data(filename_train)

        ckt_logs_dir = "ckt_logs/%s/%s" % \
            (cfg.DATASET_NAME, cfg.CONFIG_NAME)
        mkdir_p(ckt_logs_dir)
        models_dir = "models/%s/%s" % \
            (cfg.DATASET_NAME, cfg.CONFIG_NAME)
        mkdir_p(models_dir)
    else:
        s_tmp = cfg.TRAIN.PRETRAINED_MODEL
        ckt_logs_dir = s_tmp[:s_tmp.find('.ckpt')]

    model = CondGAN(image_shape=dataset.image_shape)

    algo = CondGANTrainer(model=model,
                          dataset=dataset,
                          ckt_logs_dir=ckt_logs_dir,
                          models_dir=models_dir)
    if cfg.TRAIN.FLAG:
        algo.train()
Beispiel #22
0
    def save_super_images(self, images, sample_batchs, hr_sample_batchs,
                          savenames, captions_batchs,
                          sentenceID, save_dir, subset):
        # batch_size samples for each embedding
        # Up to 16 samples for each text embedding/sentence
        numSamples = len(sample_batchs)
        for j in range(len(savenames)):
            s_tmp = '%s-1real-%dsamples/%s/%s' %\
                (save_dir, numSamples, subset, savenames[j])
            folder = s_tmp[:s_tmp.rfind('/')]
            if not os.path.isdir(folder):
                print('Make a new folder: ', folder)
                mkdir_p(folder)

            # First row with up to 8 samples
            real_img = (images[j] + 1.0) * 127.5
            img_shape = real_img.shape
            padding0 = np.zeros(img_shape)
            padding = np.zeros((img_shape[0], 20, 3))

            row1 = [padding0, real_img, padding]
            row2 = [padding0, real_img, padding]
            for i in range(np.minimum(8, numSamples)):
                lr_img = sample_batchs[i][j]
                hr_img = hr_sample_batchs[i][j]
                hr_img = (hr_img + 1.0) * 127.5
                re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
                row1.append(re_sample)
                row2.append(hr_img)
            row1 = np.concatenate(row1, axis=1)
            row2 = np.concatenate(row2, axis=1)
            superimage = np.concatenate([row1, row2], axis=0)

            # Second 8 samples with up to 8 samples
            if len(sample_batchs) > 8:
                row1 = [padding0, real_img, padding]
                row2 = [padding0, real_img, padding]
                for i in range(8, len(sample_batchs)):
                    lr_img = sample_batchs[i][j]
                    hr_img = hr_sample_batchs[i][j]
                    hr_img = (hr_img + 1.0) * 127.5
                    re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
                    row1.append(re_sample)
                    row2.append(hr_img)
                row1 = np.concatenate(row1, axis=1)
                row2 = np.concatenate(row2, axis=1)
                super_row = np.concatenate([row1, row2], axis=0)
                superimage2 = np.zeros_like(superimage)
                superimage2[:super_row.shape[0],
                            :super_row.shape[1],
                            :super_row.shape[2]] = super_row
                mid_padding = np.zeros((64, superimage.shape[1], 3))
                superimage = np.concatenate([superimage, mid_padding,
                                             superimage2], axis=0)

            top_padding = np.zeros((128, superimage.shape[1], 3))
            superimage =\
                np.concatenate([top_padding, superimage], axis=0)

            captions = captions_batchs[j][sentenceID]
            fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID)
            superimage = self.drawCaption(np.uint8(superimage), captions)
            scipy.misc.imsave(fullpath, superimage)
Beispiel #23
0
        d.text((256, 10), caption, font=fnt, fill=(255, 255, 255, 255))
    else:
        cap1= caption[: idx]
        cap2 = caption[idx+1:]
        d.text((256, 10), cap1, font=fnt, fill=(255, 255, 255, 255))
        d.text((256, 60), cap2, font=fnt, fill=(255, 255, 255, 255))

    return img_txt


def save_super_images(sample_batchs, hr_sample_batchs,
                      captions_batch, batch_size,
                      startID, save_dir):
    if not os.path.isdir(save_dir):
        print('Make a new folder: ', save_dir)
        mkdir_p(save_dir)

    # Save up to 16 samples for each text embedding/sentence
    img_shape=hr_sample_batchs[0][0].shape
    for j in range(batch_size):
        padding=np.zeros(img_shape)
        row1=[padding]
        row2=[padding]
        # First row with up to 8 samples
        for i in range(np.minimum(8, len(sample_batchs))):
            lr_img=sample_batchs[i][j]
            hr_img=hr_sample_batchs[i][j]
            hr_img=(hr_img + 1.0) * 127.5
            re_sample=scipy.misc.imresize(lr_img, hr_img.shape[:2])
            row1.append(re_sample)
            row2.append(hr_img)