예제 #1
0
    def sample(self, datapath, stage=1):
        if stage == 1:
            netG, _ = self.load_network_stageI()
        else:
            netG, _ = self.load_network_stageII()
        netG.eval()

        # Load text embeddings generated from the encoder
        t_file = torchfile.load(datapath)
        captions_list = t_file.raw_txt
        embeddings = np.concatenate(t_file.fea_txt, axis=0)
        num_embeddings = len(captions_list)
        print('Successfully load sentences from: ', datapath)
        print('Total number of sentences:', num_embeddings)
        print('num_embeddings:', num_embeddings, embeddings.shape)
        # path to save generated samples
        save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')]
        mkdir_p(save_dir)

        batch_size = np.minimum(num_embeddings, self.batch_size)
        nz = cfg.Z_DIM
        noise = Variable(torch.FloatTensor(batch_size, nz))
        if cfg.CUDA:
            noise = noise.cuda()
        count = 0
        while count < num_embeddings:
            if count > 3000:
                break
            iend = count + batch_size
            if iend > num_embeddings:
                iend = num_embeddings
                count = num_embeddings - batch_size
            embeddings_batch = embeddings[count:iend]
            # captions_batch = captions_list[count:iend]
            txt_embedding = Variable(torch.FloatTensor(embeddings_batch))
            if cfg.CUDA:
                txt_embedding = txt_embedding.cuda()

            #######################################################
            # (2) Generate fake images
            ######################################################
            noise.data.normal_(0, 1)
            inputs = (txt_embedding, noise)
            _, fake_imgs, mu, logvar = \
                nn.parallel.data_parallel(netG, inputs, self.gpus)
            for i in range(batch_size):
                save_name = '%s/%d.png' % (save_dir, count + i)
                im = fake_imgs[i].data.cpu().numpy()
                im = (im + 1.0) * 127.5
                im = im.astype(np.uint8)
                # print('im', im.shape)
                im = np.transpose(im, (1, 2, 0))
                # print('im', im.shape)
                im = Image.fromarray(im)
                im.save(save_name)
            count += batch_size
예제 #2
0
    def evaluate(self, split_dir):
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for morels is not found!')
        else:
            # Build and load the generator
            netG = G_NET()
            netG.apply(weights_init)
            netG = torch.nn.DataParallel(netG, device_ids=self.gpus)
            print(netG)
            # state_dict = torch.load(cfg.TRAIN.NET_G)
            state_dict = \
                torch.load(cfg.TRAIN.NET_G,
                           map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict)
            print('Load ', cfg.TRAIN.NET_G)

            # the path to save generated images
            s_tmp = cfg.TRAIN.NET_G
            istart = s_tmp.rfind('_') + 1
            iend = s_tmp.rfind('.')
            iteration = int(s_tmp[istart:iend])
            s_tmp = s_tmp[:s_tmp.rfind('/')]
            save_dir = '%s/iteration%d/%s' % (s_tmp, iteration, split_dir)
            if cfg.TEST.B_EXAMPLE:
                folder = '%s/super' % (save_dir)
            else:
                folder = '%s/single' % (save_dir)
            print('Make a new folder: ', folder)
            mkdir_p(folder)

            nz = cfg.GAN.Z_DIM
            noise = Variable(torch.FloatTensor(self.batch_size, nz))
            if cfg.CUDA:
                netG.cuda()
                noise = noise.cuda()

            # switch to evaluate mode
            netG.eval()
            num_batches = int(cfg.TEST.SAMPLE_NUM / self.batch_size)
            cnt = 0
            for step in xrange(num_batches):
                noise.data.normal_(0, 1)
                fake_imgs, _, _ = netG(noise)
                if cfg.TEST.B_EXAMPLE:
                    self.save_superimages(fake_imgs[-1], folder, cnt, 256)
                else:
                    self.save_singleimages(fake_imgs[-1], folder, cnt, 256)
                    # self.save_singleimages(fake_imgs[-2], folder, 128)
                    # self.save_singleimages(fake_imgs[-3], folder, 64)
                cnt += self.batch_size
예제 #3
0
    def save_singleimages(self, images, filenames,
                          save_dir, split_dir, sentenceID, imsize):
        for i in range(images.size(0)):
            s_tmp = '%s/single_samples/%s/%s' %\
                (save_dir, split_dir, filenames[i])
            folder = s_tmp[:s_tmp.rfind('/')]
            if not os.path.isdir(folder):
                print('Make a new folder: ', folder)
                mkdir_p(folder)

            fullpath = '%s_%d_sentence%d.png' % (s_tmp, imsize, sentenceID)
            # range from [-1, 1] to [0, 255]
            img = images[i].add(1).div(2).mul(255).clamp(0, 255).byte()
            ndarr = img.permute(1, 2, 0).data.cpu().numpy()
            im = Image.fromarray(ndarr)
            im.save(fullpath)
예제 #4
0
 def save_superimages(self, images_list, filenames,
                      save_dir, split_dir, imsize):
     batch_size = images_list[0].size(0)
     num_sentences = len(images_list)
     for i in range(batch_size):
         s_tmp = '%s/super/%s/%s' %\
             (save_dir, split_dir, filenames[i])
         folder = s_tmp[:s_tmp.rfind('/')]
         if not os.path.isdir(folder):
             print('Make a new folder: ', folder)
             mkdir_p(folder)
         #
         savename = '%s_%d.png' % (s_tmp, imsize)
         super_img = []
         for j in range(num_sentences):
             img = images_list[j][i]
             # print(img.size())
             img = img.view(1, 3, imsize, imsize)
             # print(img.size())
             super_img.append(img)
             # break
         super_img = torch.cat(super_img, 0)
         vutils.save_image(super_img, savename, nrow=10, normalize=True)
예제 #5
0
    def __init__(self, output_dir):
        if cfg.TRAIN.FLAG:
            self.model_dir = os.path.join(output_dir, 'Model')
            self.image_dir = os.path.join(output_dir, 'Image')
            self.log_dir = os.path.join(output_dir, 'Log')
            mkdir_p(self.model_dir)
            mkdir_p(self.image_dir)
            mkdir_p(self.log_dir)
            self.summary_writer = FileWriter(self.log_dir)

        self.max_epoch = cfg.TRAIN.MAX_EPOCH
        self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL

        s_gpus = cfg.GPU_ID.split(',')
        self.gpus = [int(ix) for ix in s_gpus]
        self.num_gpus = len(self.gpus)
        self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus
        torch.cuda.set_device(self.gpus[0])
        cudnn.benchmark = True
예제 #6
0
    def __init__(self, output_dir, data_loader, imsize):
        if cfg.TRAIN.FLAG:
            self.model_dir = os.path.join(output_dir, 'Model')
            self.image_dir = os.path.join(output_dir, 'Image')
            self.log_dir = os.path.join(output_dir, 'Log')
            mkdir_p(self.model_dir)
            mkdir_p(self.image_dir)
            mkdir_p(self.log_dir)
            self.summary_writer = FileWriter(self.log_dir)

        s_gpus = cfg.GPU_ID.split(',')
        self.gpus = [int(ix) for ix in s_gpus]
        self.num_gpus = len(self.gpus)
        torch.cuda.set_device(self.gpus[0])
        cudnn.benchmark = True

        self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus
        self.max_epoch = cfg.TRAIN.MAX_EPOCH
        self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL

        self.data_loader = data_loader
        self.num_batches = len(self.data_loader)
예제 #7
0
    def __init__(self, output_dir):
        if cfg.TRAIN.FLAG:
            self.model_dir = os.path.join(output_dir, 'Model')
            self.image_dir = os.path.join(output_dir, 'Image')
            self.log_dir = os.path.join(output_dir, 'Log')
            mkdir_p(self.model_dir)
            mkdir_p(self.image_dir)
            mkdir_p(self.log_dir)
            self.summary_writer = FileWriter(self.log_dir)

        self.max_epoch = cfg.TRAIN.MAX_EPOCH
        self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL

        s_gpus = cfg.GPU_ID.split(',')
        self.gpus = [int(ix) for ix in s_gpus]
        self.num_gpus = len(self.gpus)
        self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus
        torch.cuda.set_device(self.gpus[0])
        cudnn.benchmark = True

        #path = "../data/birds/birds.en.vec"
        path = os.path.join(cfg.DATA_DIR, cfg.DATASET_NAME + ".en.vec")
        txt_dico, _txt_emb = load_external_embeddings(path)
        #params.src_dico = src_dico
        txt_emb = nn.Embedding(len(txt_dico), 300, sparse=False)
        txt_emb.weight.data.copy_(_txt_emb)
        txt_emb.weight.requires_grad = False
        self.txt_dico = txt_dico
        self.txt_emb = txt_emb

        self.vis = visdom.Visdom(server='http://bvisionserver9.cs.unc.edu',
                                 port=8088,
                                 env="birds_spv2")
        self.vis_win1 = self.vis.images(np.ones((64, 3, 64, 64)))
        self.vis_win2 = self.vis.images(np.ones((64, 3, 64, 64)))
        self.vis_win3 = self.vis.images(np.ones((64, 3, 64, 64)))
        self.vis_txt1 = self.vis.text('')
예제 #8
0
파일: trainer.py 프로젝트: zxs789/Obj-GAN
    def __init__(self, output_dir, data_loader, dataset):
        if cfg.TRAIN.FLAG:
            self.model_dir = os.path.join(output_dir, 'Model')
            self.image_dir = os.path.join(output_dir, 'Image')
            self.score_dir = os.path.join(output_dir, 'Score')
            mkdir_p(self.model_dir)
            mkdir_p(self.image_dir)
            mkdir_p(self.score_dir)

        if len(cfg.GPU_IDS) == 1 and cfg.GPU_IDS[0] >= 0:
            torch.cuda.set_device(0)
        cudnn.benchmark = True

        self.batch_size = cfg.TRAIN.BATCH_SIZE
        self.max_epoch = cfg.TRAIN.MAX_EPOCH
        self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL
        self.print_interval = cfg.TRAIN.PRINT_INTERVAL
        self.display_interval = cfg.TRAIN.DISPLAY_INTERVAL

        self.n_words = dataset.n_words
        self.ixtoword = dataset.ixtoword
        self.cats_dict = dataset.cats_dict
        self.cats_index_dict = dataset.cats_index_dict

        self.data_loader = data_loader
        self.num_batches = len(self.data_loader)

        self.device = torch.device("cuda" if cfg.CUDA else "cpu")

        self.vgg_model = vgg19_bn(pretrained=True)
        if cfg.CUDA:
            self.vgg_model = self.vgg_model.cuda()
            if len(cfg.GPU_IDS) > 1:
                self.vgg_model = nn.DataParallel(self.vgg_model)
                self.vgg_model.to(self.device)
        self.vgg_model.eval()
예제 #9
0
    def genDiscOutputs(self, split_dir, num_samples=57140):
        if cfg.TRAIN.NET_G == '':
            logger.error('Error: the path for morels is not found!')
        else:
            if split_dir == 'test':
                split_dir = 'valid'
            # Build and load the generator
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            netG.apply(weights_init)
            netG.to(cfg.DEVICE)
            netG.eval()
            #
            text_encoder = RNN_ENCODER(self.n_words,
                                       nhidden=cfg.TEXT.EMBEDDING_DIM)  ###HACK
            state_dict = torch.load(cfg.TRAIN.NET_E,
                                    map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            text_encoder = text_encoder.to(cfg.DEVICE)
            text_encoder.eval()
            logger.info('Loaded text encoder from: %s', cfg.TRAIN.NET_E)

            batch_size = self.batch_size[0]
            nz = cfg.GAN.GLOBAL_Z_DIM
            noise = Variable(torch.FloatTensor(batch_size, nz)).to(cfg.DEVICE)
            local_noise = Variable(
                torch.FloatTensor(batch_size,
                                  cfg.GAN.LOCAL_Z_DIM)).to(cfg.DEVICE)

            model_dir = cfg.TRAIN.NET_G
            state_dict = torch.load(model_dir,
                                    map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict["netG"])
            for keys in state_dict.keys():
                print(keys)
            logger.info('Load G from: %s', model_dir)
            max_objects = 3
            from model import D_NET256
            netD = D_NET256()
            netD.load_state_dict(state_dict["netD"][2])

            netD.eval()

            # the path to save generated images
            s_tmp = model_dir[:model_dir.rfind('.pth')].split("/")[-1]
            save_dir = '%s/%s/%s' % ("../output", s_tmp, split_dir)
            mkdir_p(save_dir)
            logger.info("Saving images to: {}".format(save_dir))

            number_batches = num_samples // batch_size
            if number_batches < 1:
                number_batches = 1

            data_iter = iter(self.data_loader)
            real_labels, fake_labels, match_labels = self.prepare_labels()

            for step in tqdm(range(number_batches)):
                data = data_iter.next()

                imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot, _ = prepare_data(
                    data, eval=True)

                transf_matrices = transformation_matrices[0]
                transf_matrices_inv = transformation_matrices[1]

                hidden = text_encoder.init_hidden(batch_size)
                # words_embs: batch_size x nef x seq_len
                # sent_emb: batch_size x nef
                words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
                words_embs, sent_emb = words_embs.detach(), sent_emb.detach()
                mask = (captions == 0)
                num_words = words_embs.size(2)
                if mask.size(1) > num_words:
                    mask = mask[:, :num_words]

                #######################################################
                # (2) Generate fake images
                ######################################################
                noise.data.normal_(0, 1)
                local_noise.data.normal_(0, 1)
                inputs = (noise, local_noise, sent_emb, words_embs, mask,
                          transf_matrices, transf_matrices_inv, label_one_hot,
                          max_objects)
                inputs = tuple(
                    (inp.to(cfg.DEVICE) if isinstance(inp, torch.Tensor
                                                      ) else inp)
                    for inp in inputs)
                with torch.no_grad():
                    fake_imgs, _, mu, logvar = netG(*inputs)
                    inputs = (fake_imgs, fake_labels, transf_matrices,
                              transf_matrices_inv, max_objects)
                    codes = netsD[-1].partial_forward(*inputs)
    def sampling(self, split_dir):
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for morels is not found!')
        else:
            if split_dir == 'test':
                split_dir = 'valid'
            # Build and load the generator
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            netG.apply(weights_init)
            netG.cuda()
            netG.eval()

            # load text encoder
            text_encoder = RNN_ENCODER(self.n_words,
                                       nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = torch.load(cfg.TRAIN.NET_E,
                                    map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            #load image encoder
            image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM)
            img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder',
                                                       'image_encoder')
            state_dict = torch.load(img_encoder_path,
                                    map_location=lambda storage, loc: storage)
            image_encoder.load_state_dict(state_dict)
            print('Load image encoder from:', img_encoder_path)
            image_encoder = image_encoder.cuda()
            image_encoder.eval()

            batch_size = self.batch_size
            nz = cfg.GAN.Z_DIM
            noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True)
            noise = noise.cuda()

            model_dir = cfg.TRAIN.NET_G
            state_dict = torch.load(model_dir,
                                    map_location=lambda storage, loc: storage)
            # state_dict = torch.load(cfg.TRAIN.NET_G)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)

            # the path to save generated images
            s_tmp = model_dir[:model_dir.rfind('.pth')]
            save_dir = '%s/%s' % (s_tmp, split_dir)
            mkdir_p(save_dir)

            cnt = 0
            R_count = 0
            R = np.zeros(30000)
            cont = True
            for ii in range(11):  # (cfg.TEXT.CAPTIONS_PER_IMAGE):
                if (cont == False):
                    break
                for step, data in enumerate(self.data_loader, 0):
                    cnt += batch_size
                    if (cont == False):
                        break
                    if step % 100 == 0:
                        print('cnt: ', cnt)
                    # if step > 50:
                    #     break

                    imgs, captions, cap_lens, class_ids, keys = prepare_data(
                        data)

                    hidden = text_encoder.init_hidden(batch_size)
                    # words_embs: batch_size x nef x seq_len
                    # sent_emb: batch_size x nef
                    words_embs, sent_emb = text_encoder(
                        captions, cap_lens, hidden)
                    words_embs, sent_emb = words_embs.detach(
                    ), sent_emb.detach()
                    mask = (captions == 0)
                    num_words = words_embs.size(2)
                    if mask.size(1) > num_words:
                        mask = mask[:, :num_words]

                    #######################################################
                    # (2) Generate fake images
                    ######################################################
                    noise.data.normal_(0, 1)
                    fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs,
                                              mask, cap_lens)
                    for j in range(batch_size):
                        s_tmp = '%s/single/%s' % (save_dir, keys[j])
                        folder = s_tmp[:s_tmp.rfind('/')]
                        if not os.path.isdir(folder):
                            #print('Make a new folder: ', folder)
                            mkdir_p(folder)
                        k = -1
                        # for k in range(len(fake_imgs)):
                        im = fake_imgs[k][j].data.cpu().numpy()
                        # [-1, 1] --> [0, 255]
                        im = (im + 1.0) * 127.5
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))
                        im = Image.fromarray(im)
                        fullpath = '%s_s%d_%d.png' % (s_tmp, k, ii)
                        im.save(fullpath)

                    _, cnn_code = image_encoder(fake_imgs[-1])

                    for i in range(batch_size):
                        mis_captions, mis_captions_len = self.dataset.get_mis_caption(
                            class_ids[i])
                        hidden = text_encoder.init_hidden(99)
                        _, sent_emb_t = text_encoder(mis_captions,
                                                     mis_captions_len, hidden)
                        rnn_code = torch.cat(
                            (sent_emb[i, :].unsqueeze(0), sent_emb_t), 0)
                        ### cnn_code = 1 * nef
                        ### rnn_code = 100 * nef
                        scores = torch.mm(cnn_code[i].unsqueeze(0),
                                          rnn_code.transpose(0, 1))  # 1* 100
                        cnn_code_norm = torch.norm(cnn_code[i].unsqueeze(0),
                                                   2,
                                                   dim=1,
                                                   keepdim=True)
                        rnn_code_norm = torch.norm(rnn_code,
                                                   2,
                                                   dim=1,
                                                   keepdim=True)
                        norm = torch.mm(cnn_code_norm,
                                        rnn_code_norm.transpose(0, 1))
                        scores0 = scores / norm.clamp(min=1e-8)
                        if torch.argmax(scores0) == 0:
                            R[R_count] = 1
                        R_count += 1

                    if R_count >= 30000:
                        sum = np.zeros(10)
                        np.random.shuffle(R)
                        for i in range(10):
                            sum[i] = np.average(R[i * 3000:(i + 1) * 3000 - 1])
                        R_mean = np.average(sum)
                        R_std = np.std(sum)
                        print("R mean:{:.4f} std:{:.4f}".format(R_mean, R_std))
                        cont = False
예제 #11
0
    if cfg.CUDA:
        torch.cuda.manual_seed_all(args.manualSeed)

    ##########################################################################
    now = datetime.datetime.now(dateutil.tz.tzlocal())
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')

    isTrainable = 'T'
    layers = 1
    output_dir = '../output/{0}_L{1}_TRX_{2}_{3}_{4}'.format(
        isTrainable, layers, cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)

    model_dir = os.path.join(output_dir, 'Model')
    image_dir = os.path.join(output_dir, 'Image')
    metrics_dir = os.path.join(output_dir, 'Metrics')
    mkdir_p(model_dir)
    mkdir_p(image_dir)
    mkdir_p(metrics_dir)

    torch.cuda.set_device(cfg.GPU_ID)
    cudnn.benchmark = True

    tb_dir = '../tensorboard/{0}_L{1}_TRX_{2}_{3}_{4}'.format(
        isTrainable, layers, cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)
    mkdir_p(tb_dir)
    tbw = SummaryWriter(log_dir=tb_dir)  # Tensorboard logging

    # Get data loader ##################################################
    imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1))
    batch_size = cfg.TRAIN.BATCH_SIZE
    image_transform = transforms.Compose([
예제 #12
0
    def sampling(self, split_dir):
        if cfg.TRAIN.NET_G == '' or cfg.TRAIN.NET_C == '':
            print('Error: the path for main module or DCM is not found!')
        else:
            if split_dir == 'test':
                split_dir = 'valid'

            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            netG.apply(weights_init)
            netG.cuda()
            netG.eval()
            # The text encoder
            text_encoder = RNN_ENCODER(self.n_words,
                                       nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
              torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()
            # The image encoder
            image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM)
            img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder',
                                                       'image_encoder')
            state_dict = \
              torch.load(img_encoder_path, map_location=lambda storage, loc: storage)
            image_encoder.load_state_dict(state_dict)
            print('Load image encoder from:', img_encoder_path)
            image_encoder = image_encoder.cuda()
            image_encoder.eval()

            # The VGG network
            VGG = VGGNet()
            print("Load the VGG model")
            VGG.cuda()
            VGG.eval()

            batch_size = self.batch_size
            nz = cfg.GAN.Z_DIM
            noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True)
            noise = noise.cuda()

            # The DCM
            netDCM = DCM_Net()
            if cfg.TRAIN.NET_C != '':
                state_dict = \
                  torch.load(cfg.TRAIN.NET_C, map_location=lambda storage, loc: storage)
                netDCM.load_state_dict(state_dict)
                print('Load DCM from: ', cfg.TRAIN.NET_C)
            netDCM.cuda()
            netDCM.eval()

            model_dir = cfg.TRAIN.NET_G
            state_dict = \
              torch.load(model_dir, map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)

            # the path to save modified images
            s_tmp = model_dir[:model_dir.rfind('.pth')]
            save_dir = '%s/%s' % (s_tmp, split_dir)
            mkdir_p(save_dir)

            cnt = 0
            idx = 0
            for _ in range(5):  # (cfg.TEXT.CAPTIONS_PER_IMAGE):
                for step, data in enumerate(self.data_loader, 0):
                    cnt += batch_size
                    if step % 100 == 0:
                        print('step: ', step)

                    imgs, captions, cap_lens, class_ids, keys, wrong_caps, \
                    wrong_caps_len, wrong_cls_id = prepare_data(data)

                    #######################################################
                    # (1) Extract text and image embeddings
                    ######################################################

                    hidden = text_encoder.init_hidden(batch_size)

                    words_embs, sent_emb = text_encoder(
                        wrong_caps, wrong_caps_len, hidden)
                    words_embs, sent_emb = words_embs.detach(
                    ), sent_emb.detach()

                    mask = (wrong_caps == 0)
                    num_words = words_embs.size(2)
                    if mask.size(1) > num_words:
                        mask = mask[:, :num_words]

                    region_features, cnn_code = \
                      image_encoder(imgs[cfg.TREE.BRANCH_NUM - 1])

                    #######################################################
                    # (2) Modify real images
                    ######################################################

                    noise.data.normal_(0, 1)
                    fake_imgs, attention_maps, mu, logvar, h_code, c_code = netG(
                        noise, sent_emb, words_embs, mask, cnn_code,
                        region_features)

                    real_img = imgs[cfg.TREE.BRANCH_NUM - 1]
                    real_features = VGG(real_img)[0]

                    fake_img = netDCM(h_code, real_features, sent_emb, words_embs, \
                                      mask, c_code)
                    for j in range(batch_size):
                        s_tmp = '%s/single' % (save_dir)
                        folder = s_tmp[:s_tmp.rfind('/')]
                        if not os.path.isdir(folder):
                            print('Make a new folder: ', folder)
                            mkdir_p(folder)
                        k = -1
                        im = fake_img[j].data.cpu().numpy()
                        im = (im + 1.0) * 127.5
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))
                        im = Image.fromarray(im)
                        fullpath = '%s_s%d.png' % (s_tmp, idx)
                        idx = idx + 1
                        im.save(fullpath)
예제 #13
0
    if args.gpu_id == -1:
        cfg.CUDA = False
    else:
        cfg.GPU_ID = args.gpu_id

    if args.data_dir != '':
        cfg.DATA_DIR = args.data_dir

    assert 'real' in cfg.CONFIG_NAME

    output_dir = '../output/%s_%s' % \
        (cfg.DATASET_NAME, cfg.CONFIG_NAME)

    log_dir = output_dir + '/log'
    mkdir_p(output_dir)
    mkdir_p(output_dir + '/imgs')
    mkdir_p(output_dir + '/models')
    mkdir_p(log_dir)

    logger = setup_logger(cfg.CONFIG_NAME, log_dir)
    logger.info('Using config:')
    logger.info(str(cfg))

    if not cfg.TRAIN.FLAG:
        args.manualSeed = 100
    elif args.manualSeed is None:
        args.manualSeed = 100
        #args.manualSeed = random.randint(1, 10000)
    logger.info("seed now is : ", str(args.manualSeed))
예제 #14
0
        args.manualSeed = random.randint(1, 10000)
    random.seed(args.manualSeed)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    if cfg.CUDA:
        torch.cuda.manual_seed_all(args.manualSeed)

    ##########################################################################
    now = datetime.datetime.now(dateutil.tz.tzlocal())
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
    output_dir = '../output/%s_%s_%s' % \
        (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)

    model_dir = os.path.join(output_dir, 'Model')
    image_dir = os.path.join(output_dir, 'Image')
    mkdir_p(model_dir)
    mkdir_p(image_dir)

    torch.cuda.set_device(cfg.GPU_ID)
    cudnn.benchmark = True

    # Get data loader ##################################################
    imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1))
    batch_size = cfg.TRAIN.BATCH_SIZE
    image_transform = transforms.Compose([
        transforms.Resize(int(imsize * 76 / 64)),
        transforms.RandomCrop(imsize),
        transforms.RandomHorizontalFlip()
    ])
    dataset = FaceDataset(cfg.DATA_DIR,
                          'train',
예제 #15
0
def gen_example(n_words, wordtoix, ixtoword, model_dir):
    '''generate images from example sentences'''
    # filepath = 'example_captions.txt'
    filepath = 'caption.txt'
    data_dic = {}
    with open(filepath, "r") as f:
        filenames = f.read().split('\n')

        captions = []
        cap_lens = []

        for sent in filenames:
            if len(sent) == 0:
                continue
            sent = sent.replace("\ufffd\ufffd", " ")
            tokenizer = RegexpTokenizer(r'\w+')
            tokens = tokenizer.tokenize(sent.lower())
            if len(tokens) == 0:
                print('sentence token == 0 !')
                continue

            rev = []
            for t in tokens:
                t = t.encode('ascii', 'ignore').decode('ascii')
                if len(t) > 0 and t in wordtoix:
                    rev.append(wordtoix[t])
            captions.append(rev)
            cap_lens.append(len(rev))

        max_len = np.max(cap_lens)
        sorted_indices = np.argsort(cap_lens)[::-1]
        cap_lens = np.asarray(cap_lens)
        cap_lens = cap_lens[sorted_indices]
        cap_array = np.zeros((len(captions), max_len), dtype='int64')

        for i in range(len(captions)):
            idx = sorted_indices[i]
            cap = captions[idx]
            c_len = len(cap)
            cap_array[i, :c_len] = cap
        # key = name[(name.rfind('/') + 1):]
        key = 0
        data_dic[key] = [cap_array, cap_lens, sorted_indices]
    
    # algo.gen_example(data_dic)
    text_encoder = RNN_ENCODER(n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
    state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
    text_encoder.load_state_dict(state_dict)
    print('Load text encoder from:', cfg.TRAIN.NET_E)
    text_encoder.eval()

    netG = G_NET()
    netG.apply(weights_init)
    # netG.cuda()
    netG.eval()
    state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage)
    netG.load_state_dict(state_dict)
    print('Load G from: ', model_dir)

    save_dir = 'results/'
    mkdir_p(save_dir)
    for key in data_dic:
        captions, cap_lens, sorted_indices = data_dic[key]

        batch_size = captions.shape[0]
        nz = cfg.GAN.Z_DIM

        with torch.no_grad():
            captions = Variable(torch.from_numpy(captions))
            cap_lens = Variable(torch.from_numpy(cap_lens))

            # captions = captions.cuda()
            # cap_lens = cap_lens.cuda()
        
        for i in range(image_per_caption):  # 16
            with torch.no_grad():
                noise = Variable(torch.FloatTensor(batch_size, nz))
                # noise = noise.cuda()
            
            # (1) Extract text embeddings
            hidden = text_encoder.init_hidden(batch_size)
            words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
            mask = (captions == 0)
            # (2) Generate fake images
            noise.data.normal_(0, 1)
            fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask, cap_lens)

            cap_lens_np = cap_lens.data.numpy()

            for j in range(batch_size):
                save_name = '%s/%d_%d' % (save_dir, i, sorted_indices[j])
                for k in range(len(fake_imgs)):
                    im = fake_imgs[k][j].data.cpu().numpy()
                    im = (im + 1.0) * 127.5
                    im = im.astype(np.uint8)
                    # print('im', im.shape)
                    im = np.transpose(im, (1, 2, 0))
                    # print('im', im.shape)
                    im = Image.fromarray(im)
                    fullpath = '%s_g%d.png' % (save_name, k)
                    im.save(fullpath)

            for k in range(len(attention_maps)):
                    if len(fake_imgs) > 1:
                        im = fake_imgs[k + 1]
                    else:
                        im = fake_imgs[0]
                    attn_maps = attention_maps[k]
                    att_sze = attn_maps.size(2)
                    img_set, sentences = \
                        build_super_images2(im[j].unsqueeze(0),
                                            captions[j].unsqueeze(0),
                                            [cap_lens_np[j]], ixtoword,
                                            [attn_maps[j]], att_sze)
                    if img_set is not None:
                        im = Image.fromarray(img_set)
                        fullpath = '%s_a%d_attention.png' % (save_name, k)
                        im.save(fullpath)
예제 #16
0
    def sample2(self, datapath, stage=2):
        if stage == 1:
            netG, _, _ = self.load_network_stageI()
        else:
            netG, _, _ = self.load_network_stageII()
        netG.eval()

        # Load text embeddings generated from the encoder
        embeddings = np.load(datapath)
        num_embeddings = embeddings.shape[0]
        print('Successfully load sentences from: ', datapath)
        print('num_embeddings:', num_embeddings, embeddings.shape)
        # path to save generated samples
        save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')]
        mkdir_p(save_dir)

        batch_size = np.minimum(num_embeddings, self.batch_size)
        nz = cfg.Z_DIM
        noise = Variable(torch.FloatTensor(batch_size, nz))
        if cfg.CUDA:
            noise = noise.cuda()
        count = 0

        while count < num_embeddings:
            #             if count > 3000:
            #                 break
            iend = count + batch_size
            if iend > num_embeddings:
                iend = num_embeddings
            embeddings_batch = embeddings[count:iend]
            txt_embedding = Variable(torch.FloatTensor(embeddings_batch))
            if cfg.CUDA:
                txt_embedding = txt_embedding.cuda()

            #######################################################
            # (2) Generate fake images
            #######################################################
            noise.data.normal_(0, 1)
            inputs = (txt_embedding, noise[0:embeddings_batch.shape[0], :])
            lr_fake_imgs, fake_imgs, mu, logvar = \
                nn.parallel.data_parallel(netG, inputs, self.gpus)
            for i in range(embeddings_batch.shape[0]):
                im = fake_imgs[i].data.cpu().numpy()
                im = (im + 1.0) * 127.5
                im = im.astype(np.uint8)
                # print('im', im.shape)
                im = np.transpose(im, (1, 2, 0))
                # print('im', im.shape)
                im = Image.fromarray(im)
                save_name = '%s/%d_%d.png' % (save_dir, im.size[0],
                                              count + i + 1)
                im.save(save_name)
            for i in range(embeddings_batch.shape[0]):
                im = lr_fake_imgs[i].data.cpu().numpy()
                im = (im + 1.0) * 127.5
                im = im.astype(np.uint8)
                # print('im', im.shape)
                im = np.transpose(im, (1, 2, 0))
                # print('im', im.shape)
                im = Image.fromarray(im)
                save_name = '%s/%d_%d.png' % (save_dir, im.size[0],
                                              count + i + 1)
                im.save(save_name)
            count += batch_size
예제 #17
0
    def gen_example(self, data_dic):
        if cfg.TRAIN.NET_G == '' or cfg.TRAIN.NET_C == '':
            print('Error: the path for main module or DCM is not found!')
        else:
            # The text encoder
            text_encoder = \
                RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
                torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            # The image encoder
            """
            image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM)
            img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder')
            state_dict = \
                torch.load(img_encoder_path, map_location=lambda storage, loc: storage)
            image_encoder.load_state_dict(state_dict)
            print('Load image encoder from:', img_encoder_path)
            image_encoder = image_encoder.cuda()
            image_encoder.eval()
            """
            """
            image_encoder = CNN_dummy()
            image_encoder = image_encoder.cuda()
            image_encoder.eval()
            """

            # The VGG network
            VGG = VGG16()
            print("Load the VGG model")
            VGG.cuda()
            VGG.eval()

            # The main module
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = EncDecNet()
            s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')]
            s_tmp = os.path.join(cfg.DATA_DIR, 'output', self.args.netG,
                                 'valid/gen_example')

            model_dir = os.path.join(cfg.DATA_DIR, 'output', self.args.netG,
                                     'Model/netG_epoch_8.pth')
            state_dict = \
                torch.load(model_dir, map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)
            #netG = nn.DataParallel(netG, device_ids= self.gpus)
            netG.cuda()
            netG.eval()

            for key in data_dic:
                save_dir = '%s/%s' % (s_tmp, key)
                mkdir_p(save_dir)
                captions, cap_lens, sorted_indices, imgs = data_dic[key]

                batch_size = captions.shape[0]
                nz = cfg.GAN.Z_DIM
                captions = Variable(torch.from_numpy(captions), volatile=True)
                cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True)

                captions = captions.cuda()
                cap_lens = cap_lens.cuda()
                for i in range(1):
                    noise = Variable(torch.FloatTensor(batch_size, nz),
                                     volatile=True)
                    noise = noise.cuda()

                    #######################################################
                    # (1) Extract text and image embeddings
                    ######################################################
                    hidden = text_encoder.init_hidden(batch_size)

                    # The text embeddings
                    words_embs, sent_emb = text_encoder(
                        captions, cap_lens, hidden)
                    words_embs, sent_emb = words_embs.detach(
                    ), sent_emb.detach()

                    # The image embeddings
                    mask = (captions == 0)
                    #######################################################
                    # (2) Modify real images
                    ######################################################
                    noise.data.normal_(0, 1)

                    imgs_256 = imgs[-1].unsqueeze(0).repeat(
                        batch_size, 1, 1, 1)
                    enc_features = VGG(imgs_256)
                    fake_img, mu, logvar = nn.parallel.data_parallel(
                        netG, (imgs[-1], sent_emb, words_embs, noise, mask,
                               enc_features), self.gpus)

                    cap_lens_np = cap_lens.cpu().data.numpy()

                    one_imgs = []
                    for j in range(captions.shape[0]):
                        font = ImageFont.truetype('./FreeMono.ttf', 20)
                        canv = Image.new('RGB', (256, 256), (255, 255, 255))
                        draw = ImageDraw.Draw(canv)
                        sent = []
                        for k in range(len(captions[j])):
                            if (captions[j][k] == 0):
                                break
                            word = self.ixtoword[captions[j][k].item()].encode(
                                'ascii', 'ignore').decode('ascii')
                            if (k % 2 == 1):
                                word = word + '\n'
                            sent.append(word)
                        fake_sent = ' '.join(sent)
                        draw.text((0, 0), fake_sent, font=font, fill=(0, 0, 0))
                        canv_np = np.asarray(canv)

                        real_im = imgs[-1]
                        real_im = (real_im + 1) * 127.5
                        real_im = real_im.cpu().numpy().astype(np.uint8)
                        real_im = np.transpose(real_im, (1, 2, 0))

                        fake_im = fake_img[j]
                        fake_im = (fake_im + 1.0) * 127.5
                        fake_im = fake_im.detach().cpu().numpy().astype(
                            np.uint8)
                        fake_im = np.transpose(fake_im, (1, 2, 0))

                        one_img = np.concatenate([real_im, canv_np, fake_im],
                                                 axis=1)
                        one_imgs.append(one_img)

                    img_set = np.concatenate(one_imgs, axis=0)
                    super_img = Image.fromarray(img_set)
                    full_path = os.path.join(save_dir, 'super.png')
                    super_img.save(full_path)
                    """
                    for j in range(5): ## batch_size
                        save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j])
                        for k in range(len(fake_imgs)):
                            im = fake_imgs[k][j].data.cpu().numpy()
                            im = (im + 1.0) * 127.5
                            im = im.astype(np.uint8)
                            im = np.transpose(im, (1, 2, 0))
                            im = Image.fromarray(im)
                            fullpath = '%s_g%d.png' % (save_name, k)
                            im.save(fullpath)

                        for k in range(len(attention_maps)):
                            if len(fake_imgs) > 1:
                                im = fake_imgs[k + 1].detach().cpu()
                            else:
                                im = fake_imgs[0].detach().cpu()
                            attn_maps = attention_maps[k]
                            att_sze = attn_maps.size(2)
                    """
                    """
                            img_set, sentences = \
                                build_super_images2(im[j].unsqueeze(0),
                                                    captions[j].unsqueeze(0),
                                                    [cap_lens_np[j]], self.ixtoword,
                                                    [attn_maps[j]], att_sze)
                            if img_set is not None:
                                im = Image.fromarray(img_set)
                                fullpath = '%s_a%d.png' % (save_name, k)
                                im.save(fullpath)
                    """
                    """
예제 #18
0
    def sampling(self, split_dir):
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for main module is not found!')
        else:
            if split_dir == 'test':
                split_dir = 'valid'

            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = EncDecNet()
            netG.apply(weights_init)
            netG.cuda()
            netG.eval()
            # The text encoder
            text_encoder = RNN_ENCODER(self.n_words,
                                       nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
                torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()
            # The image encoder
            """
            image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM)
            img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder')
            state_dict = \
                torch.load(img_encoder_path, map_location=lambda storage, loc: storage)
            image_encoder.load_state_dict(state_dict)
            print('Load image encoder from:', img_encoder_path)
            image_encoder = image_encoder.cuda()
            image_encoder.eval()
            """

            # The VGG network
            VGG = VGG16()
            print("Load the VGG model")
            VGG.cuda()
            VGG.eval()

            batch_size = self.batch_size
            nz = cfg.GAN.Z_DIM
            noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True)
            noise = noise.cuda()

            model_dir = os.path.join(cfg.DATA_DIR, 'output', self.args.netG,
                                     'Model/netG_epoch_600.pth')
            state_dict = \
                torch.load(model_dir, map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)

            # the path to save modified images
            save_dir_valid = os.path.join(cfg.DATA_DIR, 'output',
                                          self.args.netG, 'valid')
            #mkdir_p(save_dir)

            cnt = 0
            idx = 0
            for i in range(5):  # (cfg.TEXT.CAPTIONS_PER_IMAGE):
                # the path to save modified images
                save_dir = os.path.join(save_dir_valid, 'valid_%d' % i)
                save_dir_super = os.path.join(save_dir, 'super')
                save_dir_single = os.path.join(save_dir, 'single')
                mkdir_p(save_dir_super)
                mkdir_p(save_dir_single)
                for step, data in enumerate(self.data_loader, 0):
                    cnt += batch_size
                    if step % 100 == 0:
                        print('step: ', step)

                    imgs, w_imgs, captions, cap_lens, class_ids, keys, wrong_caps, \
                                wrong_caps_len, wrong_cls_id = prepare_data(data)

                    #######################################################
                    # (1) Extract text and image embeddings
                    ######################################################

                    hidden = text_encoder.init_hidden(batch_size)

                    words_embs, sent_emb = text_encoder(
                        wrong_caps, wrong_caps_len, hidden)
                    words_embs, sent_emb = words_embs.detach(
                    ), sent_emb.detach()

                    mask = (wrong_caps == 0)
                    num_words = words_embs.size(2)
                    if mask.size(1) > num_words:
                        mask = mask[:, :num_words]

                    #######################################################
                    # (2) Modify real images
                    ######################################################

                    noise.data.normal_(0, 1)

                    fake_img, mu, logvar = netG(imgs[-1], sent_emb, words_embs,
                                                noise, mask, VGG)

                    img_set = build_images(imgs[-1], fake_img, captions,
                                           wrong_caps, self.ixtoword)
                    img = Image.fromarray(img_set)
                    full_path = '%s/super_step%d.png' % (save_dir_super, step)
                    img.save(full_path)

                    for j in range(batch_size):
                        s_tmp = '%s/single' % (save_dir_single)
                        folder = s_tmp[:s_tmp.rfind('/')]
                        if not os.path.isdir(folder):
                            print('Make a new folder: ', folder)
                            mkdir_p(folder)
                        k = -1
                        im = fake_img[j].data.cpu().numpy()
                        #im = (im + 1.0) * 127.5
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))
                        im = Image.fromarray(im)
                        fullpath = '%s_s%d.png' % (s_tmp, idx)
                        idx = idx + 1
                        im.save(fullpath)
예제 #19
0
파일: trainer.py 프로젝트: zxs789/Obj-GAN
    def sampling(self, split_dir):
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for morels is not found!')
        else:
            if split_dir == 'test':
                split_dir = 'valid'
            # Build and load the generator
            netG = G_NET(len(self.cats_index_dict))
            netG.apply(weights_init)
            netG.eval()

            if cfg.CUDA:
                netG.cuda()

            if len(cfg.GPU_IDS) > 1:
                netG = nn.DataParallel(netG)
                netG.to(self.device)

            batch_size = self.batch_size
            nz = cfg.GAN.Z_DIM
            noise = Variable(
                torch.FloatTensor(batch_size, cfg.ROI.BOXES_NUM,
                                  len(self.cats_index_dict) * 4))
            noise = noise.cuda()

            model_dir = cfg.TRAIN.NET_G
            state_dict = \
                torch.load(model_dir, map_location=lambda storage, loc: storage)
            # state_dict = torch.load(cfg.TRAIN.NET_G)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)

            # the path to save generated images
            s_tmp = model_dir[:model_dir.rfind('.pth')]
            save_dir = '%s/%s' % (s_tmp, split_dir)
            mkdir_p(save_dir)

            cnt = 0

            for _ in range(1):  # (cfg.TEXT.CAPTIONS_PER_IMAGE):
                for step, data in enumerate(self.data_loader, 0):
                    cnt += batch_size
                    if step % 100 == 0:
                        print('step: ', step)
                    # if step > 50:
                    #     break
                    imgs, pooled_hmaps, hmaps, bbox_maps_fwd, bbox_maps_bwd, bbox_fmaps, \
                        rois, fm_rois, num_rois, class_ids, keys = prepare_data(data)
                    num_rois = num_rois.data.cpu().numpy()

                    cats_list = []
                    for batch_index in range(self.batch_size):
                        cats = []
                        for roi_index in range(num_rois[batch_index]):
                            rela_cat_id = int(rois[batch_index, roi_index, 4])
                            abs_cat_id = self.cats_dict[rela_cat_id][0]
                            cat = self.ixtoword[abs_cat_id].encode(
                                'ascii', 'ignore').decode('ascii')
                            cats.append(cat)
                        cats_list.append(cats)

                    #######################################################
                    # (2) Generate fake images
                    ######################################################
                    max_num_roi = max(num_rois)
                    noise.data.normal_(0, 1)
                    fake_hmaps = netG(noise[:, :max_num_roi], bbox_maps_fwd,
                                      bbox_maps_bwd, bbox_fmaps)
                    fake_hmaps = fake_hmaps.repeat(1, 1, 3, 1, 1)
                    for j in range(batch_size):
                        s_tmp = '%s/single/%s' % (save_dir, keys[j])
                        folder = s_tmp[:s_tmp.rfind('/')]
                        if not os.path.isdir(folder):
                            print('Make a new folder: ', folder)
                            mkdir_p(folder)
                        k = 0
                        # for k in range(len(fake_imgs)):
                        im = fake_hmaps[j][k].data.cpu().numpy()

                        minV = im.min()
                        maxV = im.max()
                        im = (im - minV) / (maxV - minV)
                        im *= 255
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))
                        im = Image.fromarray(im)

                        cat = cats_list[j][k]
                        fullpath = '{0}_{1}.png'.format(s_tmp, cat)
                        im.save(fullpath)
예제 #20
0
    def sampling(self, split_dir):
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for morels is not found!')
        else:
            netG_list = [
                '../models/netG_epoch_50.pth', '../models/netG_epoch_60.pth',
                '../models/netG_epoch_70.pth', '../models/netG_epoch_80.pth',
                '../models/netG_epoch_90.pth', '../models/netG_epoch_100.pth',
                '../models/netG_epoch_110.pth', '../models/netG_epoch_120.pth',
                '../models/netG_epoch_130.pth', '../models/netG_epoch_140.pth',
                '../models/netG_epoch_150.pth', '../models/netG_epoch_160.pth'
            ]
            if split_dir == 'test':
                split_dir = 'valid'
            # Build and load the generator
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            netG.apply(weights_init)
            netG.cuda()
            netG.eval()
            #
            text_encoder = RNN_ENCODER(self.n_words,
                                       nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
                torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            batch_size = self.batch_size
            nz = cfg.GAN.Z_DIM
            noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True)
            noise = noise.cuda()

            model_dir = cfg.TRAIN.NET_G
            state_dict = \
                torch.load(model_dir, map_location=lambda storage, loc: storage)
            # state_dict = torch.load(cfg.TRAIN.NET_G)
            print("LINE==380")
            print("-----------------netG------------------------")
            print(netG)
            print("--------------state-dict---------------------")
            #print(state_dict)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)
            print(
                '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
            )

            # the path to save generated images
            s_tmp = model_dir[:model_dir.rfind('.pth')]
            save_dir = '%s/%s' % (s_tmp, split_dir)
            print('save_dir:', save_dir)
            mkdir_p(save_dir)

            cnt = 0

            for _ in range(1):  # (cfg.TEXT.CAPTIONS_PER_IMAGE):
                for step, data in enumerate(self.data_loader, 0):
                    cnt += batch_size
                    if step % 100 == 0:
                        print('step: ', step)
                    # if step > 50:
                    #     break

                    imgs, captions, cap_lens, class_ids, keys = prepare_data(
                        data)

                    hidden = text_encoder.init_hidden(batch_size)
                    # words_embs: batch_size x nef x seq_len
                    # sent_emb: batch_size x nef
                    words_embs, sent_emb = text_encoder(
                        captions, cap_lens, hidden)
                    words_embs, sent_emb = words_embs.detach(
                    ), sent_emb.detach()
                    mask = (captions == 0)
                    num_words = words_embs.size(2)
                    if mask.size(1) > num_words:
                        mask = mask[:, :num_words]

                    #######################################################
                    # (2) Generate fake images
                    ######################################################
                    noise.data.normal_(0, 1)
                    fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs,
                                              mask)
                    for j in range(batch_size):
                        s_tmp = '%s/single/%s' % (save_dir, keys[j])
                        folder = s_tmp[:s_tmp.rfind('/')]
                        if not os.path.isdir(folder):
                            print('Make a new folder: ', folder)
                            mkdir_p(folder)
                        k = -1
                        # for k in range(len(fake_imgs)):
                        im = fake_imgs[k][j].data.cpu().numpy()
                        # [-1, 1] --> [0, 255]
                        im = (im + 1.0) * 127.5
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))
                        im = Image.fromarray(im)
                        fullpath = '%s_s%d.png' % (s_tmp, k)
                        im.save(fullpath)
예제 #21
0
    def gen_example(self, data_dic):
        if cfg.TRAIN.NET_G == '' or cfg.TRAIN.NET_C == '':
            print('Error: the path for main module or DCM is not found!')
        else:
            # The text encoder
            text_encoder = \
              RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
              torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            # The image encoder
            image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM)
            img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder',
                                                       'image_encoder')
            state_dict = \
              torch.load(img_encoder_path, map_location=lambda storage, loc: storage)
            image_encoder.load_state_dict(state_dict)
            print('Load image encoder from:', img_encoder_path)
            image_encoder = image_encoder.cuda()
            image_encoder.eval()

            # The VGG network
            VGG = VGGNet()
            print("Load the VGG model")
            VGG.cuda()
            VGG.eval()

            # The main module
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')]
            model_dir = cfg.TRAIN.NET_G
            state_dict = \
              torch.load(model_dir, map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)
            netG.cuda()
            netG.eval()

            # The DCM
            netDCM = DCM_Net()
            if cfg.TRAIN.NET_C != '':
                state_dict = \
                  torch.load(cfg.TRAIN.NET_C, map_location=lambda storage, loc: storage)
                netDCM.load_state_dict(state_dict)
                print('Load DCM from: ', cfg.TRAIN.NET_C)
            netDCM.cuda()
            netDCM.eval()

            for key in data_dic:
                save_dir = '%s/%s' % (s_tmp, key)
                mkdir_p(save_dir)
                captions, cap_lens, sorted_indices, imgs = data_dic[key]

                batch_size = captions.shape[0]
                nz = cfg.GAN.Z_DIM
                captions = Variable(torch.from_numpy(captions), volatile=True)
                cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True)

                captions = captions.cuda()
                cap_lens = cap_lens.cuda()
                for i in range(1):
                    noise = Variable(torch.FloatTensor(batch_size, nz),
                                     volatile=True)
                    noise = noise.cuda()

                    #######################################################
                    # (1) Extract text and image embeddings
                    ######################################################
                    hidden = text_encoder.init_hidden(batch_size)

                    # The text embeddings
                    words_embs, sent_emb = text_encoder(
                        captions, cap_lens, hidden)

                    # The image embeddings
                    region_features, cnn_code = \
                      image_encoder(imgs[cfg.TREE.BRANCH_NUM - 1].unsqueeze(0))
                    mask = (captions == 0)

                    #######################################################
                    # (2) Modify real images
                    ######################################################
                    noise.data.normal_(0, 1)
                    fake_imgs, attention_maps, mu, logvar, h_code, c_code = netG(
                        noise, sent_emb, words_embs, mask, cnn_code,
                        region_features)

                    real_img = imgs[cfg.TREE.BRANCH_NUM - 1].unsqueeze(0)
                    real_features = VGG(real_img)[0]

                    fake_img = netDCM(h_code, real_features, sent_emb, words_embs, \
                                      mask, c_code)

                    cap_lens_np = cap_lens.cpu().data.numpy()
                    for j in range(batch_size):
                        save_name = '%s/%d_s_%d' % (save_dir, i,
                                                    sorted_indices[j])
                        for k in range(len(fake_imgs)):
                            im = fake_imgs[k][j].data.cpu().numpy()
                            im = (im + 1.0) * 127.5
                            im = im.astype(np.uint8)
                            im = np.transpose(im, (1, 2, 0))
                            im = Image.fromarray(im)
                            fullpath = '%s_g%d.png' % (save_name, k)
                            im.save(fullpath)

                        for k in range(len(attention_maps)):
                            if len(fake_imgs) > 1:
                                im = fake_imgs[k + 1].detach().cpu()
                            else:
                                im = fake_imgs[0].detach().cpu()
                            attn_maps = attention_maps[k]
                            att_sze = attn_maps.size(2)
                            img_set, sentences = \
                              build_super_images2(im[j].unsqueeze(0),
                                                  captions[j].unsqueeze(0),
                                                  [cap_lens_np[j]], self.ixtoword,
                                                  [attn_maps[j]], att_sze)
                            if img_set is not None:
                                im = Image.fromarray(img_set)
                                fullpath = '%s_a%d.png' % (save_name, k)
                                im.save(fullpath)

                        save_name = '%s/%d_sf_%d' % (save_dir, 1,
                                                     sorted_indices[j])
                        im = fake_img[j].data.cpu().numpy()
                        im = (im + 1.0) * 127.5
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))
                        im = Image.fromarray(im)
                        fullpath = '%s_SF.png' % (save_name)
                        im.save(fullpath)

                    save_name = '%s/%d_s_%d' % (save_dir, 1, 9)
                    im = imgs[2].data.cpu().numpy()
                    im = (im + 1.0) * 127.5
                    im = im.astype(np.uint8)
                    im = np.transpose(im, (1, 2, 0))
                    im = Image.fromarray(im)
                    fullpath = '%s_SR.png' % (save_name)
                    im.save(fullpath)
예제 #22
0
    def sampling(self, split_dir, model):
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for morels is not found!')
        else:
            if split_dir == 'test':
                split_dir = 'valid'
            # Build and load the generator
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            netG.apply(weights_init)
            if cfg.GPU_ID != -1:
                netG.cuda()
            netG.eval()
            #
            text_encoder = RNN_ENCODER(self.n_words,
                                       nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
                torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            if cfg.GPU_ID != -1:
                text_encoder = text_encoder.cuda()
            text_encoder.eval()

            batch_size = self.batch_size
            nz = cfg.GAN.Z_DIM

            with torch.no_grad():
                noise = Variable(torch.FloatTensor(batch_size, nz))
                if cfg.GPU_ID != -1:
                    noise = noise.cuda()

            model_dir = cfg.TRAIN.NET_G
            state_dict = \
                torch.load(model_dir, map_location=lambda storage, loc: storage)
            # state_dict = torch.load(cfg.TRAIN.NET_G)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)

            # the path to save generated images
            s_tmp = model_dir[:model_dir.rfind('.pth')]
            save_dir = '%s/%s' % (s_tmp, split_dir)
            mkdir_p(save_dir)

            cnt = 0

            #new
            if cfg.TRAIN.CLIP_SENTENCODER:
                print("Use CLIP SentEncoder for sampling")

            for _ in range(1):  # (cfg.TEXT.CAPTIONS_PER_IMAGE):
                for step, data in enumerate(self.data_loader, 0):
                    cnt += batch_size
                    if step % 100 == 0:
                        print('step: ', step)
                    # if step > 50:
                    #     break

                    #imgs, captions, cap_lens, class_ids, keys = prepare_data(data)
                    #new
                    imgs, captions, cap_lens, class_ids, keys, texts = prepare_data(
                        data)

                    hidden = text_encoder.init_hidden(batch_size)
                    # words_embs: batch_size x nef x seq_len
                    # sent_emb: batch_size x nef
                    words_embs, sent_emb = text_encoder(
                        captions, cap_lens, hidden)
                    words_embs, sent_emb = words_embs.detach(
                    ), sent_emb.detach()
                    mask = (captions == 0)
                    num_words = words_embs.size(2)
                    if mask.size(1) > num_words:
                        mask = mask[:, :num_words]

                    # new
                    if cfg.TRAIN.CLIP_SENTENCODER:

                        # random select one paragraph for each training example
                        sents = []
                        for idx in range(len(texts)):
                            sents_per_image = texts[idx].split(
                                '\n')  # new 3/11
                            if len(sents_per_image) > 1:
                                sent_ix = np.random.randint(
                                    0,
                                    len(sents_per_image) - 1)
                            else:
                                sent_ix = 0
                            sents.append(sents_per_image[sent_ix])
                            with open('%s/%s' % (save_dir, 'eval_sents.txt'),
                                      'a+') as f:
                                f.write(sents_per_image[sent_ix] + '\n')
                        # print('sents: ', sents)

                        sent = clip.tokenize(sents)  # .to(device)

                        # load clip
                        #model = torch.jit.load("model.pt").cuda().eval()
                        sent_input = sent
                        if cfg.GPU_ID != -1:
                            sent_input = sent.cuda()
                        # print("text input", sent_input)
                        with torch.no_grad():
                            sent_emb = model.encode_text(sent_input).float()

                    #######################################################
                    # (2) Generate fake images
                    ######################################################
                    noise.data.normal_(0, 1)
                    fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs,
                                              mask)
                    for j in range(batch_size):
                        s_tmp = '%s/fake/%s' % (save_dir, keys[j])
                        folder = s_tmp[:s_tmp.rfind('/')]
                        if not os.path.isdir(folder):
                            print('Make a new folder: ', folder)
                            mkdir_p(folder)
                            print('Make a new folder: ', f'{save_dir}/real')
                            mkdir_p(f'{save_dir}/real')
                            print('Make a new folder: ', f'{save_dir}/text')
                            mkdir_p(f'{save_dir}/text')
                        k = -1
                        # for k in range(len(fake_imgs)):
                        im = fake_imgs[k][j].data.cpu().numpy()
                        # [-1, 1] --> [0, 255]
                        im = (im + 1.0) * 127.5
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))
                        im = Image.fromarray(im)
                        fullpath = '%s_s%d.png' % (s_tmp, k)
                        im.save(fullpath)
                        temp = keys[j].replace('b', '').replace("'", '')
                        shutil.copy(f"../data/Face/images/{temp}.jpg",
                                    f"{save_dir}/real/")
                        shutil.copy(f"../data/Face/text/{temp}.txt",
                                    f"{save_dir}/text/")
예제 #23
0
    def sample(self, split_dir, num_samples=25, draw_bbox=False):
        from PIL import Image, ImageDraw, ImageFont
        import cPickle as pickle
        import torchvision
        import torchvision.utils as vutils

        if cfg.TRAIN.NET_G == '':
            print('Error: the path for model NET_G is not found!')
        else:
            if split_dir == 'test':
                split_dir = 'valid'
            # Build and load the generator
            text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
                torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            batch_size = cfg.TRAIN.BATCH_SIZE
            nz = cfg.GAN.Z_DIM

            model_dir = cfg.TRAIN.NET_G
            state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage)
            # state_dict = torch.load(cfg.TRAIN.NET_G)
            netG = G_NET()
            print('Load G from: ', model_dir)
            netG.apply(weights_init)

            netG.load_state_dict(state_dict["netG"])
            netG.cuda()
            netG.eval()

            # the path to save generated images
            s_tmp = model_dir[:model_dir.rfind('.pth')]
            save_dir = '%s_%s' % (s_tmp, split_dir)
            mkdir_p(save_dir)
            #######################################
            noise = Variable(torch.FloatTensor(9, nz))

            imsize = 256

            for step, data in enumerate(self.data_loader, 0):
                if step >= num_samples:
                    break

                imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot, bbox = \
                    prepare_data(data, eval=True)
                transf_matrices_inv = transformation_matrices[1][0].unsqueeze(0)
                label_one_hot = label_one_hot[0].unsqueeze(0)

                img = imgs[-1][0]
                val_image = img.view(1, 3, imsize, imsize)

                hidden = text_encoder.init_hidden(batch_size)
                # words_embs: batch_size x nef x seq_len
                # sent_emb: batch_size x nef
                words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
                words_embs, sent_emb = words_embs[0].unsqueeze(0).detach(), sent_emb[0].unsqueeze(0).detach()
                words_embs = words_embs.repeat(9, 1, 1)
                sent_emb = sent_emb.repeat(9, 1)
                mask = (captions == 0)
                mask = mask[0].unsqueeze(0)
                num_words = words_embs.size(2)
                if mask.size(1) > num_words:
                    mask = mask[:, :num_words]
                mask = mask.repeat(9, 1)
                transf_matrices_inv = transf_matrices_inv.repeat(9, 1, 1, 1)
                label_one_hot = label_one_hot.repeat(9, 1, 1)

                #######################################################
                # (2) Generate fake images
                ######################################################
                noise.data.normal_(0, 1)
                inputs = (noise, sent_emb, words_embs, mask, transf_matrices_inv, label_one_hot)
                with torch.no_grad():
                    fake_imgs, _, mu, logvar = nn.parallel.data_parallel(netG, inputs, self.gpus)

                data_img = torch.FloatTensor(10, 3, imsize, imsize).fill_(0)
                data_img[0] = val_image
                data_img[1:10] = fake_imgs[-1]

                if draw_bbox:
                    for idx in range(3):
                        x, y, w, h = tuple([int(imsize*x) for x in bbox[0, idx]])
                        w = imsize-1 if w > imsize-1 else w
                        h = imsize-1 if h > imsize-1 else h
                        if x <= -1:
                            break
                        data_img[:10, :, y, x:x + w] = 1
                        data_img[:10, :, y:y + h, x] = 1
                        data_img[:10, :, y+h, x:x + w] = 1
                        data_img[:10, :, y:y + h, x + w] = 1

                # get caption
                cap = captions[0].data.cpu().numpy()
                sentence = ""
                for j in range(len(cap)):
                    if cap[j] == 0:
                        break
                    word = self.ixtoword[cap[j]].encode('ascii', 'ignore').decode('ascii')
                    sentence += word + " "
                sentence = sentence[:-1]
                vutils.save_image(data_img, '{}/{}_{}.png'.format(save_dir, sentence, step), normalize=True, nrow=10)
            print("Saved {} files to {}".format(step, save_dir))
예제 #24
0
    def sampling(self, split_dir):
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for models is not found!')
        else:
            if split_dir == 'test':
                split_dir = 'valid'
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            netG.apply(weights_init)
            netG.cuda()
            netG.eval()
            #
            text_encoder = RNN_ENCODER(self.n_words,
                                       nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
                torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            batch_size = self.batch_size
            nz = cfg.GAN.Z_DIM
            noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True)
            noise = noise.cuda()

            model_dir = cfg.TRAIN.NET_G
            state_dict = \
                torch.load(model_dir, map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)

            # the path to save generated images
            s_tmp = model_dir[:model_dir.rfind('.pth')]
            save_dir = '%s/%s' % (s_tmp, split_dir)
            mkdir_p(save_dir)

            cnt = 0
            idx = 0  ###

            avg_ddva = 0
            for _ in range(1):
                for step, data in enumerate(self.data_loader, 0):
                    cnt += batch_size
                    if step % 100 == 0:
                        print('step: ', step)

                    captions, cap_lens, imperfect_captions, imperfect_cap_lens, misc = data

                    # Generate images for human-text ----------------------------------------------------------------
                    data_human = [captions, cap_lens, misc]

                    imgs, captions, cap_lens, class_ids, keys, wrong_caps,\
                                wrong_caps_len, wrong_cls_id= prepare_data(data_human)

                    hidden = text_encoder.init_hidden(batch_size)
                    words_embs, sent_emb = text_encoder(
                        captions, cap_lens, hidden)
                    words_embs, sent_emb = words_embs.detach(
                    ), sent_emb.detach()
                    mask = (captions == 0)
                    num_words = words_embs.size(2)
                    if mask.size(1) > num_words:
                        mask = mask[:, :num_words]

                    noise.data.normal_(0, 1)
                    fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs,
                                              mask)

                    # Generate images for imperfect caption-text-------------------------------------------------------
                    data_imperfect = [
                        imperfect_captions, imperfect_cap_lens, misc
                    ]

                    imgs, imperfect_captions, imperfect_cap_lens, class_ids, imperfect_keys, wrong_caps,\
                                wrong_caps_len, wrong_cls_id = prepare_data(data_imperfect)

                    hidden = text_encoder.init_hidden(batch_size)
                    words_embs, sent_emb = text_encoder(
                        imperfect_captions, imperfect_cap_lens, hidden)
                    words_embs, sent_emb = words_embs.detach(
                    ), sent_emb.detach()
                    mask = (imperfect_captions == 0)
                    num_words = words_embs.size(2)
                    if mask.size(1) > num_words:
                        mask = mask[:, :num_words]

                    noise.data.normal_(0, 1)
                    imperfect_fake_imgs, _, _, _ = netG(
                        noise, sent_emb, words_embs, mask)

                    # Sort the results by keys to align ----------------------------------------------------------------
                    keys, captions, cap_lens, fake_imgs, _, _ = sort_by_keys(
                        keys, captions, cap_lens, fake_imgs, None, None)

                    imperfect_keys, imperfect_captions, imperfect_cap_lens, imperfect_fake_imgs, true_imgs, _ = \
                                sort_by_keys(imperfect_keys, imperfect_captions, imperfect_cap_lens, imperfect_fake_imgs,\
                                             imgs, None)

                    # Shift device for the imgs, target_imgs and imperfect_imgs------------------------------------------------
                    for i in range(len(imgs)):
                        imgs[i] = imgs[i].to(secondary_device)
                        imperfect_fake_imgs[i] = imperfect_fake_imgs[i].to(
                            secondary_device)
                        fake_imgs[i] = fake_imgs[i].to(secondary_device)

                    for j in range(batch_size):
                        s_tmp = '%s/single' % (save_dir)
                        folder = s_tmp[:s_tmp.rfind('/')]
                        if not os.path.isdir(folder):
                            print('Make a new folder: ', folder)
                            mkdir_p(folder)
                        k = -1
                        im = fake_imgs[k][j].data.cpu().numpy()
                        im = (im + 1.0) * 127.5
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))

                        cap_im = imperfect_fake_imgs[k][j].data.cpu().numpy()
                        cap_im = (cap_im + 1.0) * 127.5
                        cap_im = cap_im.astype(np.uint8)
                        cap_im = np.transpose(cap_im, (1, 2, 0))

                        # Uncomment to scale true image
                        true_im = true_imgs[k][j].data.cpu().numpy()
                        true_im = (true_im + 1.0) * 127.5
                        true_im = true_im.astype(np.uint8)
                        true_im = np.transpose(true_im, (1, 2, 0))

                        # Uncomment to save images.
                        #true_im = Image.fromarray(true_im)
                        #fullpath = '%s_true_s%d.png' % (s_tmp, idx)
                        #true_im.save(fullpath)
                        im = Image.fromarray(im)
                        fullpath = '%s_s%d.png' % (s_tmp, idx)
                        im.save(fullpath)
                        #cap_im = Image.fromarray(cap_im)
                        #fullpath = '%s_imperfect_s%d.png' % (s_tmp, idx)
                        idx = idx + 1
                        #cap_im.save(fullpath)

                    neg_ddva = negative_ddva(
                        imperfect_fake_imgs,
                        imgs,
                        fake_imgs,
                        reduce='mean',
                        final_only=True).data.cpu().numpy()
                    avg_ddva += neg_ddva * (-1)

                    #text_caps = [[self.ixtoword[word] for word in sent if word!=0] for sent in captions.tolist()]

                    #imperfect_text_caps = [[self.ixtoword[word] for word in sent if word!=0] for sent in
                    #                       imperfect_captions.tolist()]

                    print(step)
            avg_ddva = avg_ddva / (step + 1)
            print('\n\nAvg_DDVA: ', avg_ddva)
    def save_img_results(self,
                         netsG,
                         noise,
                         atts,
                         image_atts,
                         inception_model,
                         classifiers,
                         real_imgs,
                         gen_iterations,
                         name='current'):
        mkdir_p(self.image_dir)

        # Save images
        """
        if self.args.kl_loss:
            fake_imgs, _, _ = netG(noise, atts) ##
        else:
            fake_imgs, _  = netG(noise, atts, image_att, inception_model, classifiers, imgs)
        """

        fake_imgs = []
        C_losses = None
        if not self.args.kl_loss:
            if cfg.TREE.BRANCH_NUM > 0:
                fake_img1, h_code1 = nn.parallel.data_parallel(
                    netsG[0], (noise, atts, image_atts), self.gpus)
                fake_imgs.append(fake_img1)
                if self.args.split == 'train':  ##for train: real_imgsの特徴量を使う。
                    att_embeddings, C_losses = classifier_loss(
                        classifiers, inception_model, real_imgs[0], image_atts,
                        C_losses)
                    _, C_losses = classifier_loss(classifiers, inception_model,
                                                  fake_img1, image_atts,
                                                  C_losses)
                else:  ##いらない
                    att_embeddings, _ = classifier_loss(
                        classifiers, inception_model, fake_img1, image_atts)

            if cfg.TREE.BRANCH_NUM > 1:
                fake_img2, h_code2 = nn.parallel.data_parallel(
                    netsG[1], (h_code1, att_embeddings), self.gpus)
                fake_imgs.append(fake_img2)
                if self.args.split == 'train':
                    att_embeddings, C_losses = classifier_loss(
                        classifiers, inception_model, real_imgs[1], image_atts,
                        C_losses)
                    _, C_losses = classifier_loss(classifiers, inception_model,
                                                  fake_img1, image_atts,
                                                  C_losses)
                else:
                    att_embeddings, _ = classifier_loss(
                        classifiers, inception_model, fake_img1, image_atts)

            if cfg.TREE.BRANCH_NUM > 2:
                fake_img3 = nn.parallel.data_parallel(
                    netsG[2], (h_code2, att_embeddings), self.gpus)
                fake_imgs.append(fake_img3)

        ##make image set
        img_set = build_images(fake_imgs)  ##
        img = Image.fromarray(img_set)
        full_path = '%s/G_%s.png' % (self.image_dir, gen_iterations)
        img.save(full_path)
예제 #26
0
    def __init__(self,
                 output_dir,
                 cap_model,
                 vocab,
                 eval_utils,
                 my_resnet,
                 word2idx,
                 glove_model,
                 idx2word,
                 vocab_cap=None,
                 eval_kwargs={}):  #

        if cfg.TRAIN.FLAG:
            self.model_dir = os.path.join(output_dir, 'Model')
            self.image_dir = os.path.join(output_dir, 'Image')
            self.test_img_dir = os.path.join(output_dir, 'test')
            self.log_dir = os.path.join(output_dir, 'Log')
            mkdir_p(self.model_dir)
            mkdir_p(self.image_dir)
            mkdir_p(self.log_dir)
            mkdir_p(self.test_img_dir)
            #self.summary_writer = FileWriter(self.log_dir)

        self.cap_model = None
        self.max_epoch = cfg.TRAIN.MAX_EPOCH
        self.snapshot_interval = 5
        self.gpus = None
        self.num_gpus = 1
        if cfg.GPU_ID is not None:
            s_gpus = cfg.GPU_ID.split(',')
            self.gpus = [int(ix) for ix in s_gpus]
            self.num_gpus = len(self.gpus)

        self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus
        if cfg.CUDA:
            torch.cuda.set_device(self.gpus[0])
            cudnn.benchmark = True
        self.vocab = vocab
        self.eval_kwargs = eval_kwargs
        self.eval_utils = eval_utils
        self.my_resnet = my_resnet
        self.text_dim = cfg.TEXT.DIMENSION
        self.word2idx = word2idx
        self.idx2word = idx2word
        self.glove_model = glove_model
        print("Length of glove model", len(glove_model))

        if not cfg.CAP.USE:
            self.CTencoder = STEncoder(cfg.TEXT.HIDDENSTATE, 1,
                                       cfg.GAN.CONDITION_DIM, True, 128)
            ##layers, hidden size, bidirectional, emb_dim
            self.CTdecoder = STDuoDecoderAttn(256, cfg.TEXT.HIDDENSTATE,
                                              cfg.GAN.CONDITION_DIM,
                                              len(glove_model))
        else:
            self.CTencoder = STEncoder(cfg.TEXT.HIDDENSTATE, 1, 512, True, 128)
            ##hidden_size, embedding_dim, thought_size, vocab_size
            self.CTdecoder = STDuoDecoderAttn(256, cfg.TEXT.HIDDENSTATE, 512,
                                              len(glove_model))

        ##encoder_model, decoder_model, embedding_dim, vocab_size
        self.CTallmodel = UniSKIP_variant(self.CTencoder,
                                          self.CTdecoder, self.text_dim,
                                          len(self.glove_model), glove_model,
                                          word2idx, idx2word)

        self.cosEmbLoss = nn.CosineEmbeddingLoss()
        ## Loss Function
        self.CTloss = nn.CrossEntropyLoss()
        if (cfg.CUDA):
            self.cosEmbLoss = self.cosEmbLoss.cuda()
            self.CTloss = self.CTloss.cuda()
            self.CTencoder = self.CTencoder.cuda()
            self.CTdecoder = self.CTdecoder.cuda()
            self.CTallmodel = self.CTallmodel.cuda()
        self.new_arch = eval_kwargs['new_arch']
        self.cap_model_bool = eval_kwargs['cap_flag']
        self.vocab_cap = vocab_cap
예제 #27
0
    def gen_samples(self, idx):

        text_encoder = RNN_ENCODER(self.n_words,
                                   nhidden=cfg.TEXT.EMBEDDING_DIM)
        state_dict = torch.load(cfg.TRAIN.NET_E,
                                map_location=lambda storage, loc: storage)
        text_encoder.load_state_dict(state_dict)
        print('Load text encoder from: {}'.format(cfg.TRAIN.NET_E))
        text_encoder = text_encoder.cuda()
        text_encoder.eval()

        netG = G_NET()
        state_dict = torch.load(cfg.TRAIN.NET_G,
                                map_location=lambda storage, loc: storage)
        netG.load_state_dict(state_dict)
        print('Load G from: {}'.format(cfg.TRAIN.NET_G))
        netG.cuda()
        netG.eval()

        s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')]
        save_dir = '%s/samples' % (s_tmp)
        mkdir_p(save_dir)

        batch_size = self.batch_size
        nz = cfg.GAN.Z_DIM
        with torch.no_grad():
            noise = Variable(torch.FloatTensor(batch_size, nz))
        noise = noise.cuda()

        step = 0
        data_iter = iter(self.data_loader)

        while step < self.num_batches:
            data = data_iter.next()
            imgs, captions, cap_lens, class_ids, sorted_cap_indices = self.prepare_data(
                data)
            hidden = text_encoder.init_hidden(batch_size)
            words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
            mask = (captions == 0)
            num_words = words_embs.size(2)
            if mask.size(1) > num_words:
                mask = mask[:, :num_words]
            for i in range(10):
                noise.data.normal_(0, 1)
                fake_imgs, attention_maps, _, _ = netG(noise, sent_emb,
                                                       words_embs, mask)
                cap_lens_np = cap_lens.cpu().data.numpy()
                for j in range(batch_size):
                    right_idx = step * batch_size + sorted_cap_indices[j]
                    save_name = '%s/%d_s_%d' % (save_dir, i, right_idx)
                    original_idx = idx[right_idx]
                    shutil.copyfile(
                        '/.local/AttnGAN/data/FashionSynthesis/test/original/test128_{}.png'
                        .format(original_idx + 1),
                        save_dir + '/test128_{0}_{1}.png'.format(
                            original_idx + 1, right_idx))
                    for k in range(len(fake_imgs)):
                        im = fake_imgs[k][j].data.cpu().numpy()
                        im = (im + 1.0) * 127.5
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))
                        im = Image.fromarray(im)
                        fullpath = '%s_g%d.png' % (save_name, k)
                        im.save(fullpath)
                    for k in range(len(attention_maps)):
                        if len(fake_imgs) > 1:
                            im = fake_imgs[k + 1].detach().cpu()
                        else:
                            im = fake_imgs[0].detach().cpu()
                        attn_maps = attention_maps[k]
                        att_sze = attn_maps.size(2)
                        img_set, sentences = \
                            build_super_images2(im[j].unsqueeze(0),
                                                captions[j].unsqueeze(0),
                                                [cap_lens_np[j]], self.ixtoword,
                                                [attn_maps[j]], att_sze)
                        if img_set is not None:
                            im = Image.fromarray(img_set)
                            fullpath = '%s_a%d.png' % (save_name, k)
                            im.save(fullpath)
            step += 1
예제 #28
0
    def sampling(self, split_dir, num_samples=30000):
        if cfg.TRAIN.NET_G == '':
            logger.error('Error: the path for morels is not found!')
        else:
            if split_dir == 'test':
                split_dir = 'valid'
            # Build and load the generator
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            netG.apply(weights_init)
            netG.to(cfg.DEVICE)
            netG.eval()
            #
            text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            text_encoder = text_encoder.to(cfg.DEVICE)
            text_encoder.eval()
            logger.info('Loaded text encoder from: %s', cfg.TRAIN.NET_E)

            batch_size = self.batch_size[0]
            nz = cfg.GAN.GLOBAL_Z_DIM
            noise = Variable(torch.FloatTensor(batch_size, nz)).to(cfg.DEVICE)
            local_noise = Variable(torch.FloatTensor(batch_size, cfg.GAN.LOCAL_Z_DIM)).to(cfg.DEVICE)

            model_dir = cfg.TRAIN.NET_G
            state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict["netG"])
            max_objects = 10
            logger.info('Load G from: %s', model_dir)

            # the path to save generated images
            s_tmp = model_dir[:model_dir.rfind('.pth')].split("/")[-1]
            save_dir = '%s/%s/%s' % ("../output", s_tmp, split_dir)
            mkdir_p(save_dir)
            logger.info("Saving images to: {}".format(save_dir))

            number_batches = num_samples // batch_size
            if number_batches < 1:
                number_batches = 1

            data_iter = iter(self.data_loader)

            for step in tqdm(range(number_batches)):
                data = data_iter.next()

                imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot, _ = prepare_data(
                    data, eval=True)

                transf_matrices = transformation_matrices[0]
                transf_matrices_inv = transformation_matrices[1]

                hidden = text_encoder.init_hidden(batch_size)
                # words_embs: batch_size x nef x seq_len
                # sent_emb: batch_size x nef
                words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
                words_embs, sent_emb = words_embs.detach(), sent_emb.detach()
                mask = (captions == 0)
                num_words = words_embs.size(2)
                if mask.size(1) > num_words:
                    mask = mask[:, :num_words]

                #######################################################
                # (2) Generate fake images
                ######################################################
                noise.data.normal_(0, 1)
                local_noise.data.normal_(0, 1)
                inputs = (noise, local_noise, sent_emb, words_embs, mask, transf_matrices, transf_matrices_inv, label_one_hot, max_objects)
                inputs = tuple((inp.to(cfg.DEVICE) if isinstance(inp, torch.Tensor) else inp) for inp in inputs)

                with torch.no_grad():
                    fake_imgs, _, mu, logvar = netG(*inputs)
                for batch_idx, j in enumerate(range(batch_size)):
                    s_tmp = '%s/%s' % (save_dir, keys[j])
                    folder = s_tmp[:s_tmp.rfind('/')]
                    if not os.path.isdir(folder):
                        logger.info('Make a new folder: %s', folder)
                        mkdir_p(folder)
                    k = -1
                    # for k in range(len(fake_imgs)):
                    im = fake_imgs[k][j].data.cpu().numpy()
                    # [-1, 1] --> [0, 255]
                    im = (im + 1.0) * 127.5
                    im = im.astype(np.uint8)
                    im = np.transpose(im, (1, 2, 0))
                    im = Image.fromarray(im)
                    fullpath = '%s_s%d.png' % (s_tmp, step*batch_size+batch_idx)
                    im.save(fullpath)
예제 #29
0
    def sample(self, datapath, stage=1):
        if stage == 1:
            netG, _ = self.load_network_stageI()
        else:
            netG, _ = self.load_network_stageII()
        netG.eval()

        # Load text embeddings generated from the encoder
        #t_file = torchfile.load(datapath)
        t_file = torch.load(datapath)
        #captions_list = t_file.raw_txt
        captions_list = t_file['raw_txt']
        #embeddings = np.concatenate(t_file.fea_txt, axis=0)
        embeddings = np.concatenate(t_file['fea_txt'], axis=0)
        num_embeddings = len(captions_list)
        print('Successfully load sentences from: ', datapath)
        print('Total number of sentences:', num_embeddings)
        print('num_embeddings:', num_embeddings, embeddings.shape)
        # path to save generated samples
        #save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')]
        save_dir = '../../../Exp_label_stage2_2'
        mkdir_p(save_dir)

        batch_size = np.minimum(num_embeddings, self.batch_size)
        nz = cfg.Z_DIM
        noise = Variable(torch.FloatTensor(batch_size, nz))
        if cfg.CUDA:
            noise = noise.cuda()
        count = 0
        while count < num_embeddings:
            #            if count > 3000:
            #                break
            iend = count + batch_size
            if iend > num_embeddings:
                iend = num_embeddings
                count = num_embeddings - batch_size
            embeddings_batch = embeddings[count:iend]
            captions_batch = captions_list[count:iend]
            txt_embedding = Variable(
                torch.FloatTensor(embeddings_batch)).float()
            if cfg.CUDA:
                txt_embedding = txt_embedding.cuda()

            #######################################################
            # (2) Generate fake images
            ######################################################
            noise.data.normal_(0, 1)
            inputs = (txt_embedding, noise)
            if stage == 1:
                _, fake_imgs, mu, logvar, _ = \
                    nn.parallel.data_parallel(netG, inputs, self.gpus)
            else:
                _, fake_imgs, mu, logvar = \
                           nn.parallel.data_parallel(netG, inputs, self.gpus)

            for i in range(batch_size):
                save_name = '%s/%d.png' % (save_dir, count + i)
                #save_name = '%s/%s.png' % (save_dir,str(count)+'_'+str(i))
                im = fake_imgs[i].data.cpu().numpy()
                im = (im + 1.0) * 127.5
                im = im.astype(np.uint8)
                # print('im', im.shape)
                im = np.transpose(im, (1, 2, 0))
                # print('im', im.shape)
                im = Image.fromarray(im)
                im.save(save_name)
            count += batch_size
예제 #30
0
    def sample(self, datapath, stage=1):
        if stage == 1:
            netG, _ = self.load_network_stageI()
        else:
            netG, _ = self.load_network_stageII()
        netG.eval()

        # Load text embeddings generated from the encoder
        t_file = torchfile.load(datapath)
        data_dir = osp.dirname(datapath)
        original_filenames = np.loadtxt('%s/val_filename.txt' % data_dir,
                                        dtype=str)
        original_captions = np.loadtxt('%s/val_captions.txt' % data_dir,
                                       dtype=str,
                                       delimiter='\n')
        captions_list = t_file.raw_txt
        # print(len(original_captions), len(captions_list))
        # for i in range(len(captions_list)):
        #     tmp1 = original_captions[i]
        #     tmp2 = captions_list[i]
        #     print(i)
        #     print(tmp1)
        #     print(tmp2)
        #     print('------')
        #     assert(tmp1 == tmp2)
        embeddings = np.concatenate(t_file.fea_txt, axis=0)
        num_embeddings = len(captions_list)
        print('Successfully load sentences from: ', datapath)
        print('Total number of sentences:', num_embeddings)
        print('num_embeddings:', num_embeddings, embeddings.shape)
        # path to save generated samples
        save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')]
        mkdir_p(save_dir)

        batch_size = np.minimum(num_embeddings, self.batch_size)
        nz = cfg.Z_DIM
        noise = Variable(torch.FloatTensor(batch_size, nz))
        if cfg.CUDA:
            noise = noise.cuda()
        count = 0
        while count < num_embeddings:
            # if count > 3000:
            #     break
            iend = count + batch_size
            if iend > num_embeddings:
                iend = num_embeddings
                count = num_embeddings - batch_size
            embeddings_batch = embeddings[count:iend]
            filenames_batch = original_filenames[count:iend]
            # captions_batch = captions_list[count:iend]
            txt_embedding = Variable(torch.FloatTensor(embeddings_batch))
            if cfg.CUDA:
                txt_embedding = txt_embedding.cuda()

            #######################################################
            # (2) Generate fake images
            ######################################################
            noise.data.normal_(0, 1)
            inputs = (txt_embedding, noise)
            _, fake_imgs, mu, logvar = \
                nn.parallel.data_parallel(netG, inputs, self.gpus)
            for i in range(batch_size):
                # save_name = '%s/%05d.png' % (save_dir, count + i)
                save_name = osp.join(save_dir, filenames_batch[i] + '.jpg')

                im = fake_imgs[i].data.cpu().numpy()
                im = (im + 1.0) * 127.5
                im = im.astype(np.uint8)
                # print('im', im.shape)
                im = np.transpose(im, (1, 2, 0))
                # print('im', im.shape)
                im = Image.fromarray(im)
                im.save(save_name)
            count += batch_size
            print(count)
    def sampling(self):
        if self.args.netG == '':
            print('Error: the path for models is not found!')
        else:
            data_dir = cfg.DATA_DIR
            if self.args.split == "test_unseen":
                filepath = os.path.join(data_dir,
                                        "test_unseen/class_data.pickle")
            else:  #test_seen
                filepath = os.path.join(data_dir,
                                        "test_seen/class_data.pickle")
            if os.path.isfile(filepath):
                with open(filepath, "rb") as f:
                    data_dic = pkl.load(f)
            class_names = data_dic['classes']
            class_ids = data_dic['class_info']

            att_dir = os.path.join(data_dir, "CUB_200_2011/attributes")
            att_np = np.zeros((312, 200))  #for CUB
            with open(att_dir + "/class_attribute_labels_continuous.txt",
                      "r") as f:
                for ind, line in enumerate(f.readlines()):
                    line = line.strip("\n")
                    line = list(map(float, line.split()))
                    att_np[:, ind] = line

            if self.args.kl_loss:
                netG = G_NET()
            else:
                netG = G_NET_not_CA()
            test_model = "netG_epoch_600.pth"
            model_path = os.path.join(self.args.netG, "Model", test_model)  ##
            state_dic = torch.load(model_path,
                                   map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dic)
            netG.cuda()
            netG.eval()

            noise = torch.FloatTensor(self.batch_size, cfg.GAN.Z_DIM)

            for class_name, class_id in zip(class_names, class_ids):
                print("now generating, ", class_name)
                class_dir = os.path.join(self.args.netG, 'valid',
                                         test_model[:test_model.rfind(".")],
                                         self.args.split, class_name)
                atts = att_np[:, class_id - 1]
                atts = np.expand_dims(atts, axis=0)
                atts = atts.repeat(self.batch_size, axis=0)
                assert atts.shape == (self.batch_size, 312)

                if cfg.CUDA:
                    noise = noise.cuda()
                    atts = torch.cuda.FloatTensor(atts)
                else:
                    atts = torch.FloatTensor(atts)

                for i in range(self.sample_num):
                    noise.normal_(0, 1)
                    if self.args.kl_loss:
                        fake_imgs, _, _ = nn.parallel.data_parallel(
                            netG, (noise, atts), self.gpus)
                    else:
                        fake_imgs = nn.parallel.data_parallel(
                            netG, (noise, atts), self.gpus)
                    for stage in range(len(fake_imgs)):
                        for num, im in enumerate(fake_imgs[stage]):
                            im = im.detach().cpu()
                            im = im.add_(1).div_(2).mul_(255)
                            im = im.numpy().astype(np.uint8)
                            im = np.transpose(im, (1, 2, 0))
                            im = Image.fromarray(im)
                            stage_dir = os.path.join(class_dir,
                                                     "stage_%d" % stage)
                            mkdir_p(stage_dir)
                            img_path = os.path.join(stage_dir,
                                                    "single_%d.png" % num)
                            im.save(img_path)
                        for j in range(int(self.batch_size /
                                           20)):  ## cfg.batch_size==100
                            one_set = [
                                fake_imgs[0][j * 20:(j + 1) * 20],
                                fake_imgs[1][j * 20:(j + 1) * 20],
                                fake_imgs[2][j * 20:(j + 1) * 20]
                            ]
                            img_set = build_images(one_set)
                            img_set = Image.fromarray(img_set)
                            super_dir = os.path.join(class_dir, "super")
                            mkdir_p(super_dir)
                            img_path = os.path.join(super_dir,
                                                    "super_%d.png" % j)
                            img_set.save(img_path)
예제 #32
0
    def sampling(self, split_dir, num_samples=30000):
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for morels is not found!')
        else:
            if split_dir == 'test':
                split_dir = 'valid'
            # Build and load the generator
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            netG.apply(weights_init)
            netG.cuda()
            netG.eval()
            #
            text_encoder = RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
                torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            batch_size = self.batch_size
            nz = cfg.GAN.Z_DIM
            noise = Variable(torch.FloatTensor(batch_size, nz))
            noise = noise.cuda()

            model_dir = cfg.TRAIN.NET_G
            state_dict = \
                torch.load(model_dir, map_location=lambda storage, loc: storage)
            # state_dict = torch.load(cfg.TRAIN.NET_G)
            netG.load_state_dict(state_dict["netG"])
            print('Load G from: ', model_dir)

            # the path to save generated images
            s_tmp = model_dir[:model_dir.rfind('.pth')]
            save_dir = '%s/%s' % (s_tmp, split_dir)
            mkdir_p(save_dir)

            cnt = 0

            for _ in range(1):  # (cfg.TEXT.CAPTIONS_PER_IMAGE):
                for step, data in enumerate(self.data_loader, 0):
                    cnt += batch_size
                    if step % 10000 == 0:
                        print('step: ', step)
                    if step >= num_samples:
                        break

                    imgs, captions, cap_lens, class_ids, keys, transformation_matrices, label_one_hot = prepare_data(data)
                    transf_matrices_inv = transformation_matrices[1]

                    hidden = text_encoder.init_hidden(batch_size)
                    # words_embs: batch_size x nef x seq_len
                    # sent_emb: batch_size x nef
                    words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
                    words_embs, sent_emb = words_embs.detach(), sent_emb.detach()
                    mask = (captions == 0)
                    num_words = words_embs.size(2)
                    if mask.size(1) > num_words:
                        mask = mask[:, :num_words]

                    #######################################################
                    # (2) Generate fake images
                    ######################################################
                    noise.data.normal_(0, 1)
                    inputs = (noise, sent_emb, words_embs, mask, transf_matrices_inv, label_one_hot)
                    with torch.no_grad():
                        fake_imgs, _, mu, logvar = nn.parallel.data_parallel(netG, inputs, self.gpus)
                    for j in range(batch_size):
                        s_tmp = '%s/single/%s' % (save_dir, keys[j])
                        folder = s_tmp[:s_tmp.rfind('/')]
                        if not os.path.isdir(folder):
                            print('Make a new folder: ', folder)
                            mkdir_p(folder)
                        k = -1
                        # for k in range(len(fake_imgs)):
                        im = fake_imgs[k][j].data.cpu().numpy()
                        # [-1, 1] --> [0, 255]
                        im = (im + 1.0) * 127.5
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))
                        im = Image.fromarray(im)
                        fullpath = '%s_s%d.png' % (s_tmp, k)
                        im.save(fullpath)
예제 #33
0
    def sample(self,
               datapath,
               num_samples=25,
               stage=1,
               draw_bbox=True,
               max_objects=3):
        from PIL import Image, ImageDraw, ImageFont
        import pickle
        import torchvision
        import torchvision.utils as vutils
        img_dir = cfg.IMG_DIR
        if stage == 1:
            netG, _ = self.load_network_stageI()
        else:
            netG, _ = self.load_network_stageII()
        netG.eval()

        # Load text embeddings generated from the encoder
        t_file = torchfile.load(datapath + "val_captions.t7")

        captions_list = t_file.raw_txt

        embeddings = np.concatenate(t_file.fea_txt, axis=0)
        num_embeddings = len(captions_list)
        label, bbox = load_validation_data(datapath)

        filepath = os.path.join(datapath, 'val_filename.txt')
        with open(filepath, 'r') as f:
            # filenames = pickle.load(f)
            filenames = f.readlines()

        print('Successfully load sentences from: ', datapath)
        print('Total number of sentences:', num_embeddings)
        # path to save generated samples
        save_dir = cfg.NET_G[:cfg.NET_G.find('.pth')] + "_visualize_bbox"
        print("saving to:", save_dir)
        mkdir_p(save_dir)

        if cfg.CUDA:
            if cfg.STAGE == 1:
                bbox = bbox.cuda()
            elif cfg.STAGE == 2:
                bbox = [bbox.clone().cuda(), bbox.cuda()]
            label = label.cuda()

        #######################################
        if cfg.STAGE == 1:
            bbox_ = bbox.clone()
        elif cfg.STAGE == 2:
            bbox_ = bbox[0].clone()

        if cfg.STAGE == 1:
            bbox = bbox.view(-1, 4)
            transf_matrices_inv = compute_transformation_matrix_inverse(bbox)
            transf_matrices_inv = transf_matrices_inv.view(
                num_embeddings, max_objects, 2, 3)
        elif cfg.STAGE == 2:
            _bbox = bbox.view(-1, 4)
            transf_matrices_inv = compute_transformation_matrix_inverse(_bbox)
            transf_matrices_inv = transf_matrices_inv.view(
                num_embeddings, max_objects, 2, 3)

            _bbox = bbox.view(-1, 4)
            transf_matrices_inv_s2 = compute_transformation_matrix_inverse(
                _bbox)
            transf_matrices_inv_s2 = transf_matrices_inv_s2.view(
                num_embeddings, max_objects, 2, 3)
            transf_matrices_s2 = compute_transformation_matrix(_bbox)
            transf_matrices_s2 = transf_matrices_s2.view(
                num_embeddings, max_objects, 2, 3)

        # produce one-hot encodings of the labels
        _labels = label.long()
        # remove -1 to enable one-hot converting
        _labels[_labels < 0] = 80
        # label_one_hot = torch.cuda.FloatTensor(num_embeddings, max_objects, 81).fill_(0)
        label_one_hot = torch.FloatTensor(num_embeddings, max_objects,
                                          81).fill_(0)
        label_one_hot = label_one_hot.scatter_(2, _labels, 1).float()
        #######################################

        nz = cfg.Z_DIM
        noise = Variable(torch.FloatTensor(9, nz))
        if cfg.CUDA:
            noise = noise.cuda()

        imsize = 64 if stage == 1 else 256

        for count in range(num_samples):
            index = int(np.random.randint(0, num_embeddings, 1))
            key = filenames[index].strip('\n')
            img_name = img_dir + "/" + key + ".jpg"
            # img = Image.open(img_name).convert('RGB').resize((imsize, imsize), Image.ANTIALIAS)
            # val_image = torchvision.transforms.functional.to_tensor(img)
            # val_image = val_image.view(1, 3, imsize, imsize)
            # val_image = (val_image - 0.5) * 2

            embeddings_batch = embeddings[index]
            transf_matrices_inv_batch = transf_matrices_inv[index]
            label_one_hot_batch = label_one_hot[index]

            embeddings_batch = np.reshape(embeddings_batch,
                                          (1, 1024)).repeat(9, 0)
            transf_matrices_inv_batch = transf_matrices_inv_batch.view(
                1, 3, 2, 3).repeat(9, 1, 1, 1)
            label_one_hot_batch = label_one_hot_batch.view(1, 3,
                                                           81).repeat(9, 1, 1)

            if cfg.STAGE == 2:
                transf_matrices_s2_batch = transf_matrices_s2[index]
                transf_matrices_s2_batch = transf_matrices_s2_batch.view(
                    1, 3, 2, 3).repeat(9, 1, 1, 1)
                transf_matrices_inv_s2_batch = transf_matrices_inv_s2[index]
                transf_matrices_inv_s2_batch = transf_matrices_inv_s2_batch.view(
                    1, 3, 2, 3).repeat(9, 1, 1, 1)

            txt_embedding = Variable(torch.FloatTensor(embeddings_batch))
            if cfg.CUDA:
                label_one_hot_batch = label_one_hot_batch.cuda()
                txt_embedding = txt_embedding.cuda()

            #######################################################
            # (2) Generate fake images
            ######################################################
            noise.data.normal_(0, 1)
            # inputs = (txt_embedding, noise, transf_matrices_inv_batch, label_one_hot_batch)
            if cfg.STAGE == 1:
                inputs = (txt_embedding, noise, transf_matrices_inv_batch,
                          label_one_hot_batch)
            elif cfg.STAGE == 2:
                inputs = (txt_embedding, noise, transf_matrices_inv_batch,
                          transf_matrices_s2_batch,
                          transf_matrices_inv_s2_batch, label_one_hot_batch)
            with torch.no_grad():
                # _, fake_imgs, mu, logvar, _ = nn.parallel.data_parallel(netG, inputs, self.gpus)
                _, fake_imgs, mu, logvar, _ = netG(*inputs)

            data_img = torch.FloatTensor(10, 3, imsize, imsize).fill_(0)
            # data_img[0] = val_image
            data_img[0:10] = fake_imgs

            if draw_bbox:
                for idx in range(3):
                    x, y, w, h = tuple(
                        [int(imsize * x) for x in bbox_[index, idx]])
                    w = imsize - 1 if w > imsize - 1 else w
                    h = imsize - 1 if h > imsize - 1 else h
                    if x <= -1:
                        break
                    data_img[:10, :, y, x:x + w] = 1
                    data_img[:10, :, y:y + h, x] = 1
                    data_img[:10, :, y + h, x:x + w] = 1
                    data_img[:10, :, y:y + h, x + w] = 1

            print('Caption: ', captions_list[index].decode('utf-8'))
            vutils.save_image(data_img,
                              '{}/{}.png'.format(
                                  save_dir,
                                  captions_list[index].decode('utf-8')),
                              normalize=True,
                              nrow=5)

        print("Saved {} files to {}".format(count + 1, save_dir))
예제 #34
0
    def gen_example(self, data_dic):
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for morels is not found!')
        else:
            # Build and load the generator
            text_encoder = \
                RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
                torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            # the path to save generated images
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')]
            model_dir = cfg.TRAIN.NET_G
            state_dict = \
                torch.load(model_dir, map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)
            netG.cuda()
            netG.eval()
            for key in data_dic:
                save_dir = '%s/%s' % (s_tmp, key)
                mkdir_p(save_dir)
                captions, cap_lens, sorted_indices = data_dic[key]

                batch_size = captions.shape[0]
                nz = cfg.GAN.Z_DIM
                captions = Variable(torch.from_numpy(captions), volatile=True)
                cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True)

                captions = captions.cuda()
                cap_lens = cap_lens.cuda()
                for i in range(1):  # 16
                    noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True)
                    noise = noise.cuda()
                    #######################################################
                    # (1) Extract text embeddings
                    ######################################################
                    hidden = text_encoder.init_hidden(batch_size)
                    # words_embs: batch_size x nef x seq_len
                    # sent_emb: batch_size x nef
                    words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
                    mask = (captions == 0)
                    #######################################################
                    # (2) Generate fake images
                    ######################################################
                    noise.data.normal_(0, 1)
                    with torch.no_grad():
                        fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask)
                    # G attention
                    cap_lens_np = cap_lens.cpu().data.numpy()
                    for j in range(batch_size):
                        save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j])
                        for k in range(len(fake_imgs)):
                            im = fake_imgs[k][j].data.cpu().numpy()
                            im = (im + 1.0) * 127.5
                            im = im.astype(np.uint8)
                            # print('im', im.shape)
                            im = np.transpose(im, (1, 2, 0))
                            # print('im', im.shape)
                            im = Image.fromarray(im)
                            fullpath = '%s_g%d.png' % (save_name, k)
                            im.save(fullpath)

                        for k in range(len(attention_maps)):
                            if len(fake_imgs) > 1:
                                im = fake_imgs[k + 1].detach().cpu()
                            else:
                                im = fake_imgs[0].detach().cpu()
                            attn_maps = attention_maps[k]
                            att_sze = attn_maps.size(2)
                            img_set, sentences = \
                                build_super_images2(im[j].unsqueeze(0),
                                                    captions[j].unsqueeze(0),
                                                    [cap_lens_np[j]], self.ixtoword,
                                                    [attn_maps[j]], att_sze)
                            if img_set is not None:
                                im = Image.fromarray(img_set)
                                fullpath = '%s_a%d.png' % (save_name, k)
                                im.save(fullpath)
예제 #35
0
    def gen_example(self, data_dic):
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for morels is not found!')
        else:
            # Build and load the generator
            text_encoder = \
                BertEncoder(cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
                torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            # the path to save generated images
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
                netG_mix = G_NET_MIX()
            s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')]
            model_dir = cfg.TRAIN.NET_G
            state_dict = \
                torch.load(model_dir, map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict)
            netG_mix.load_state_dict(state_dict)
            print('Load G from: ', model_dir)
            netG.cuda()
            netG_mix.cuda()
            netG.eval()
            netG_mix.eval()
            for key in data_dic:
                save_dir = '%s/%s' % (s_tmp, key)
                print(save_dir)
                mkdir_p(save_dir)
                captions, cap_lens, sorted_indices = data_dic[key]

                batch_size = captions.shape[0]
                nz = cfg.GAN.Z_DIM
                captions = Variable(torch.from_numpy(captions))
                cap_lens = Variable(torch.from_numpy(cap_lens))

                captions = captions.cuda()
                cap_lens = cap_lens.cuda()
                for i in range(1):  # 16
                    noise = Variable(torch.FloatTensor(2, batch_size, nz))
                    noise = noise.cuda()
                    #######################################################
                    # (1) Extract text embeddings
                    ######################################################
                    # hidden = text_encoder.init_hidden(batch_size)
                    # words_embs: batch_size x nef x seq_len
                    # sent_emb: batch_size x nef
                    words_embs, sent_emb = text_encoder(captions)
                    mask = (captions == 0)
                    #######################################################
                    # (2) Generate fake images
                    ######################################################
                    noise.data.normal_(0, 1)
                    noise1 = noise[0]
                    noise2 = noise[1]
                    fake_imgs_mix1, attention_maps, _, _ = netG_mix(
                        noise, sent_emb, words_embs, mask)
                    noise = torch.cat(
                        [noise.chunk(2, 0)[1],
                         noise.chunk(2, 0)[0]], dim=0)
                    fake_imgs_mix2, attention_maps, _, _ = netG_mix(
                        noise, sent_emb, words_embs, mask)
                    fake_imgs1, attention_maps, _, _ = netG(
                        noise1, sent_emb, words_embs, mask)
                    fake_imgs2, attention_maps, _, _ = netG(
                        noise2, sent_emb, words_embs, mask)
                    # G attention
                    cap_lens_np = cap_lens.cpu().data.numpy()
                    for j in range(batch_size):
                        save_name = '%s/%d_s_%d' % (save_dir, i,
                                                    sorted_indices[j])
                        for k in range(len(fake_imgs_mix1)):
                            im = fake_imgs_mix1[k][j].data.cpu().numpy()
                            im = (im + 1.0) * 127.5
                            im = im.astype(np.uint8)
                            # print('im', im.shape)
                            im = np.transpose(im, (1, 2, 0))
                            # print('im', im.shape)
                            im = Image.fromarray(im)
                            fullpath = '%s_g%d_AB.png' % (save_name, k)
                            im.save(fullpath)
                        for k in range(len(fake_imgs_mix2)):
                            im = fake_imgs_mix2[k][j].data.cpu().numpy()
                            im = (im + 1.0) * 127.5
                            im = im.astype(np.uint8)
                            # print('im', im.shape)
                            im = np.transpose(im, (1, 2, 0))
                            # print('im', im.shape)
                            im = Image.fromarray(im)
                            fullpath = '%s_g%d_BA.png' % (save_name, k)
                            im.save(fullpath)
                        for k in range(len(fake_imgs1)):
                            im = fake_imgs1[k][j].data.cpu().numpy()
                            im = (im + 1.0) * 127.5
                            im = im.astype(np.uint8)
                            # print('im', im.shape)
                            im = np.transpose(im, (1, 2, 0))
                            # print('im', im.shape)
                            im = Image.fromarray(im)
                            fullpath = '%s_g%d_A.png' % (save_name, k)
                            im.save(fullpath)
                        for k in range(len(fake_imgs2)):
                            im = fake_imgs2[k][j].data.cpu().numpy()
                            im = (im + 1.0) * 127.5
                            im = im.astype(np.uint8)
                            # print('im', im.shape)
                            im = np.transpose(im, (1, 2, 0))
                            # print('im', im.shape)
                            im = Image.fromarray(im)
                            fullpath = '%s_g%d_B.png' % (save_name, k)
                            im.save(fullpath)