def gen_example(self, data_dic):
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for morels is not found!')
        else:
            # Build and load the generator
            text_encoder = \
                RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
                torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            # the path to save generated images
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')]
            model_dir = cfg.TRAIN.NET_G
            state_dict = \
                torch.load(model_dir, map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)
            netG.cuda()
            netG.eval()
            for key in data_dic:
                save_dir = '%s/%s' % (s_tmp, key)
                mkdir_p(save_dir)
                captions, cap_lens, sorted_indices = data_dic[key]

                batch_size = captions.shape[0]
                nz = cfg.GAN.Z_DIM
                captions = Variable(torch.from_numpy(captions), volatile=True)
                cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True)

                captions = captions.cuda()
                cap_lens = cap_lens.cuda()
                for i in range(1):  # 16
                    noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True)
                    noise = noise.cuda()
                    #######################################################
                    # (1) Extract text embeddings
                    ######################################################
                    hidden = text_encoder.init_hidden(batch_size)
                    # words_embs: batch_size x nef x seq_len
                    # sent_emb: batch_size x nef
                    words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
                    mask = (captions == 0)
                    #######################################################
                    # (2) Generate fake images
                    ######################################################
                    noise.data.normal_(0, 1)
                    with torch.no_grad():
                        fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask)
                    # G attention
                    cap_lens_np = cap_lens.cpu().data.numpy()
                    for j in range(batch_size):
                        save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j])
                        for k in range(len(fake_imgs)):
                            im = fake_imgs[k][j].data.cpu().numpy()
                            im = (im + 1.0) * 127.5
                            im = im.astype(np.uint8)
                            # print('im', im.shape)
                            im = np.transpose(im, (1, 2, 0))
                            # print('im', im.shape)
                            im = Image.fromarray(im)
                            fullpath = '%s_g%d.png' % (save_name, k)
                            im.save(fullpath)

                        for k in range(len(attention_maps)):
                            if len(fake_imgs) > 1:
                                im = fake_imgs[k + 1].detach().cpu()
                            else:
                                im = fake_imgs[0].detach().cpu()
                            attn_maps = attention_maps[k]
                            att_sze = attn_maps.size(2)
                            img_set, sentences = \
                                build_super_images2(im[j].unsqueeze(0),
                                                    captions[j].unsqueeze(0),
                                                    [cap_lens_np[j]], self.ixtoword,
                                                    [attn_maps[j]], att_sze)
                            if img_set is not None:
                                im = Image.fromarray(img_set)
                                fullpath = '%s_a%d.png' % (save_name, k)
                                im.save(fullpath)
Example #2
0
def gen_example(n_words, wordtoix, ixtoword, model_dir):
    '''generate images from example sentences'''
    # filepath = 'example_captions.txt'
    filepath = 'caption.txt'
    data_dic = {}
    with open(filepath, "r") as f:
        filenames = f.read().split('\n')

        captions = []
        cap_lens = []

        for sent in filenames:
            if len(sent) == 0:
                continue
            sent = sent.replace("\ufffd\ufffd", " ")
            tokenizer = RegexpTokenizer(r'\w+')
            tokens = tokenizer.tokenize(sent.lower())
            if len(tokens) == 0:
                print('sentence token == 0 !')
                continue

            rev = []
            for t in tokens:
                t = t.encode('ascii', 'ignore').decode('ascii')
                if len(t) > 0 and t in wordtoix:
                    rev.append(wordtoix[t])
            captions.append(rev)
            cap_lens.append(len(rev))

        max_len = np.max(cap_lens)
        sorted_indices = np.argsort(cap_lens)[::-1]
        cap_lens = np.asarray(cap_lens)
        cap_lens = cap_lens[sorted_indices]
        cap_array = np.zeros((len(captions), max_len), dtype='int64')

        for i in range(len(captions)):
            idx = sorted_indices[i]
            cap = captions[idx]
            c_len = len(cap)
            cap_array[i, :c_len] = cap
        # key = name[(name.rfind('/') + 1):]
        key = 0
        data_dic[key] = [cap_array, cap_lens, sorted_indices]
    
    # algo.gen_example(data_dic)
    text_encoder = RNN_ENCODER(n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
    state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
    text_encoder.load_state_dict(state_dict)
    print('Load text encoder from:', cfg.TRAIN.NET_E)
    text_encoder.eval()

    netG = G_NET()
    netG.apply(weights_init)
    # netG.cuda()
    netG.eval()
    state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage)
    netG.load_state_dict(state_dict)
    print('Load G from: ', model_dir)

    save_dir = 'results/'
    mkdir_p(save_dir)
    for key in data_dic:
        captions, cap_lens, sorted_indices = data_dic[key]

        batch_size = captions.shape[0]
        nz = cfg.GAN.Z_DIM

        with torch.no_grad():
            captions = Variable(torch.from_numpy(captions))
            cap_lens = Variable(torch.from_numpy(cap_lens))

            # captions = captions.cuda()
            # cap_lens = cap_lens.cuda()
        
        for i in range(image_per_caption):  # 16
            with torch.no_grad():
                noise = Variable(torch.FloatTensor(batch_size, nz))
                # noise = noise.cuda()
            
            # (1) Extract text embeddings
            hidden = text_encoder.init_hidden(batch_size)
            words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
            mask = (captions == 0)
            # (2) Generate fake images
            noise.data.normal_(0, 1)
            fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask, cap_lens)

            cap_lens_np = cap_lens.data.numpy()

            for j in range(batch_size):
                save_name = '%s/%d_%d' % (save_dir, i, sorted_indices[j])
                for k in range(len(fake_imgs)):
                    im = fake_imgs[k][j].data.cpu().numpy()
                    im = (im + 1.0) * 127.5
                    im = im.astype(np.uint8)
                    # print('im', im.shape)
                    im = np.transpose(im, (1, 2, 0))
                    # print('im', im.shape)
                    im = Image.fromarray(im)
                    fullpath = '%s_g%d.png' % (save_name, k)
                    im.save(fullpath)

            for k in range(len(attention_maps)):
                    if len(fake_imgs) > 1:
                        im = fake_imgs[k + 1]
                    else:
                        im = fake_imgs[0]
                    attn_maps = attention_maps[k]
                    att_sze = attn_maps.size(2)
                    img_set, sentences = \
                        build_super_images2(im[j].unsqueeze(0),
                                            captions[j].unsqueeze(0),
                                            [cap_lens_np[j]], ixtoword,
                                            [attn_maps[j]], att_sze)
                    if img_set is not None:
                        im = Image.fromarray(img_set)
                        fullpath = '%s_a%d_attention.png' % (save_name, k)
                        im.save(fullpath)
Example #3
0
def generate(caption, wordtoix, ixtoword, text_encoder, netG, blob_service, copies=2):
    # load word vector
    captions, cap_lens = vectorize_caption(wordtoix, caption, copies)
    n_words = len(wordtoix)

    # only one to generate
    batch_size = captions.shape[0]

    nz = cfg.GAN.Z_DIM
    with torch.no_grad():
        captions = Variable(torch.from_numpy(captions))
        cap_lens = Variable(torch.from_numpy(cap_lens))
        noise = Variable(torch.FloatTensor(batch_size, nz))

    if cfg.CUDA:
        captions = captions.cuda()
        cap_lens = cap_lens.cuda()
        noise = noise.cuda()



    #######################################################
    # (1) Extract text embeddings
    #######################################################
    hidden = text_encoder.init_hidden(batch_size)
    words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
    mask = (captions == 0)


    #######################################################
    # (2) Generate fake images
    #######################################################
    noise.data.normal_(0, 1)
    fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask)

    # ONNX EXPORT
    #export = os.environ["EXPORT_MODEL"].lower() == 'true'
    if False:
        print("saving text_encoder.onnx")
        text_encoder_out = torch.onnx._export(text_encoder, (captions, cap_lens, hidden), "text_encoder.onnx", export_params=True)
        print("uploading text_encoder.onnx")
        blob_service.create_blob_from_path('models', "text_encoder.onnx", os.path.abspath("text_encoder.onnx"))
        print("done")

        print("saving netg.onnx")
        netg_out = torch.onnx._export(netG, (noise, sent_emb, words_embs, mask), "netg.onnx", export_params=True)
        print("uploading netg.onnx")
        blob_service.create_blob_from_path('models', "netg.onnx", os.path.abspath("netg.onnx"))
        print("done")
        return

    # G attention
    cap_lens_np = cap_lens.cpu().data.numpy()

    # storing to blob storage
    container_name = "images"
    full_path = "https://attgan.blob.core.windows.net/images/%s"
    prefix = datetime.now().strftime('%Y/%B/%d/%H_%M_%S_%f')
    urls = []
    # only look at first one
    #j = 0
    for j in range(batch_size):
        for k in range(len(fake_imgs)):
            im = fake_imgs[k][j].data.cpu().numpy()
            im = (im + 1.0) * 127.5
            im = im.astype(np.uint8)
            im = np.transpose(im, (1, 2, 0))
            im = Image.fromarray(im)

            # save image to stream
            stream = io.BytesIO()
            im.save(stream, format="png")
            stream.seek(0)
            if copies > 2:
                blob_name = '%s/%d/%s_g%d.png' % (prefix, j, "bird", k)
            else:
                blob_name = '%s/%s_g%d.png' % (prefix, "bird", k)
            blob_service.create_blob_from_stream(container_name, blob_name, stream)
            urls.append(full_path % blob_name)

            if copies == 2:
                for k in range(len(attention_maps)):
                #if False:
                    if len(fake_imgs) > 1:
                        im = fake_imgs[k + 1].detach().cpu()
                    else:
                        im = fake_imgs[0].detach().cpu()

                    attn_maps = attention_maps[k]
                    att_sze = attn_maps.size(2)

                    img_set, sentences = \
                        build_super_images2(im[j].unsqueeze(0),
                                            captions[j].unsqueeze(0),
                                            [cap_lens_np[j]], ixtoword,
                                            [attn_maps[j]], att_sze)

                    if img_set is not None:
                        im = Image.fromarray(img_set)
                        stream = io.BytesIO()
                        im.save(stream, format="png")
                        stream.seek(0)

                        blob_name = '%s/%s_a%d.png' % (prefix, "attmaps", k)
                        blob_service.create_blob_from_stream(container_name, blob_name, stream)
                        urls.append(full_path % blob_name)
        if copies == 2:
            break

    #print(len(urls), urls)
    return urls
Example #4
0
    def save_img_results(self,
                         netG,
                         noise,
                         imgs,
                         bbox_maps_fwd,
                         bbox_maps_bwd,
                         bbox_fmaps,
                         hmaps,
                         rois,
                         num_rois,
                         gen_iterations,
                         name='current'):
        # Save images
        font_max = 20
        font_size = 12

        imgs = imgs.cpu()
        fake_hmaps = netG(noise, bbox_maps_fwd, bbox_maps_bwd, bbox_fmaps)

        fake_hmaps = fake_hmaps.squeeze().detach().cpu()
        hmaps = hmaps.squeeze().cpu()

        # prepare captions
        batch_size = fake_hmaps.size(0)
        captions = Variable(torch.zeros(batch_size, cfg.ROI.BOXES_NUM)).cuda()
        for batch_index in range(self.batch_size):
            for roi_index in range(num_rois[batch_index]):
                rela_cat_id = int(rois[batch_index, roi_index, 4])
                captions[batch_index,
                         roi_index] = self.cats_dict[rela_cat_id][0]

        att_sze = fake_hmaps.size(2)
        img_set, _ = build_super_images(imgs,
                                        captions,
                                        self.ixtoword,
                                        fake_hmaps,
                                        att_sze,
                                        lr_imgs=None,
                                        font_max=font_max,
                                        font_size=font_size,
                                        max_word_num=cfg.ROI.BOXES_NUM)

        if img_set is not None:
            im = Image.fromarray(img_set)
            fullpath = '%s/G_%s_%d.png' % (self.image_dir, name,
                                           gen_iterations)
            im.save(fullpath)

        img_set, _ = build_super_images(imgs,
                                        captions,
                                        self.ixtoword,
                                        hmaps,
                                        att_sze,
                                        lr_imgs=None,
                                        font_max=font_max,
                                        font_size=font_size,
                                        max_word_num=cfg.ROI.BOXES_NUM)

        if img_set is not None:
            im = Image.fromarray(img_set)
            fullpath = '%s/D_%s_%d.png' % (self.image_dir, name,
                                           gen_iterations)
            im.save(fullpath)

        #
        img_set, _ = build_super_images2(imgs,
                                         captions,
                                         self.ixtoword,
                                         fake_hmaps,
                                         att_sze,
                                         lr_imgs=None,
                                         font_max=font_max,
                                         font_size=font_size,
                                         max_word_num=cfg.ROI.BOXES_NUM)

        if img_set is not None:
            im = Image.fromarray(img_set)
            fullpath = '%s/G2_%s_%d.png' % (self.image_dir, name,
                                            gen_iterations)
            im.save(fullpath)

        img_set, _ = build_super_images2(imgs,
                                         captions,
                                         self.ixtoword,
                                         hmaps,
                                         att_sze,
                                         lr_imgs=None,
                                         font_max=font_max,
                                         font_size=font_size,
                                         max_word_num=cfg.ROI.BOXES_NUM)

        if img_set is not None:
            im = Image.fromarray(img_set)
            fullpath = '%s/D2_%s_%d.png' % (self.image_dir, name,
                                            gen_iterations)
            im.save(fullpath)
Example #5
0
    def gen_samples(self, idx):

        text_encoder = RNN_ENCODER(self.n_words,
                                   nhidden=cfg.TEXT.EMBEDDING_DIM)
        state_dict = torch.load(cfg.TRAIN.NET_E,
                                map_location=lambda storage, loc: storage)
        text_encoder.load_state_dict(state_dict)
        print('Load text encoder from: {}'.format(cfg.TRAIN.NET_E))
        text_encoder = text_encoder.cuda()
        text_encoder.eval()

        netG = G_NET()
        state_dict = torch.load(cfg.TRAIN.NET_G,
                                map_location=lambda storage, loc: storage)
        netG.load_state_dict(state_dict)
        print('Load G from: {}'.format(cfg.TRAIN.NET_G))
        netG.cuda()
        netG.eval()

        s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')]
        save_dir = '%s/samples' % (s_tmp)
        mkdir_p(save_dir)

        batch_size = self.batch_size
        nz = cfg.GAN.Z_DIM
        with torch.no_grad():
            noise = Variable(torch.FloatTensor(batch_size, nz))
        noise = noise.cuda()

        step = 0
        data_iter = iter(self.data_loader)

        while step < self.num_batches:
            data = data_iter.next()
            imgs, captions, cap_lens, class_ids, sorted_cap_indices = self.prepare_data(
                data)
            hidden = text_encoder.init_hidden(batch_size)
            words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
            mask = (captions == 0)
            num_words = words_embs.size(2)
            if mask.size(1) > num_words:
                mask = mask[:, :num_words]
            for i in range(10):
                noise.data.normal_(0, 1)
                fake_imgs, attention_maps, _, _ = netG(noise, sent_emb,
                                                       words_embs, mask)
                cap_lens_np = cap_lens.cpu().data.numpy()
                for j in range(batch_size):
                    right_idx = step * batch_size + sorted_cap_indices[j]
                    save_name = '%s/%d_s_%d' % (save_dir, i, right_idx)
                    original_idx = idx[right_idx]
                    shutil.copyfile(
                        '/.local/AttnGAN/data/FashionSynthesis/test/original/test128_{}.png'
                        .format(original_idx + 1),
                        save_dir + '/test128_{0}_{1}.png'.format(
                            original_idx + 1, right_idx))
                    for k in range(len(fake_imgs)):
                        im = fake_imgs[k][j].data.cpu().numpy()
                        im = (im + 1.0) * 127.5
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))
                        im = Image.fromarray(im)
                        fullpath = '%s_g%d.png' % (save_name, k)
                        im.save(fullpath)
                    for k in range(len(attention_maps)):
                        if len(fake_imgs) > 1:
                            im = fake_imgs[k + 1].detach().cpu()
                        else:
                            im = fake_imgs[0].detach().cpu()
                        attn_maps = attention_maps[k]
                        att_sze = attn_maps.size(2)
                        img_set, sentences = \
                            build_super_images2(im[j].unsqueeze(0),
                                                captions[j].unsqueeze(0),
                                                [cap_lens_np[j]], self.ixtoword,
                                                [attn_maps[j]], att_sze)
                        if img_set is not None:
                            im = Image.fromarray(img_set)
                            fullpath = '%s_a%d.png' % (save_name, k)
                            im.save(fullpath)
            step += 1
Example #6
0
    def generate(self, caption, copies=2):
        # load word vector
        captions, cap_lens, n_words = self.vectorize_caption(caption, copies)

        # only one to generate
        batch_size = captions.shape[0]

        nz = cfg.GAN.Z_DIM
        captions = Variable(torch.from_numpy(captions), volatile=True)
        cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True)
        noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True)

        if self.cuda:
            captions = captions.cuda()
            cap_lens = cap_lens.cuda()
            noise = noise.cuda()

        #######################################################
        # (1) Extract text embeddings
        #######################################################
        hidden = self.text_encoder.init_hidden(batch_size)
        words_embs, sent_emb = self.text_encoder(captions, cap_lens, hidden)
        mask = (captions == 0)

        #######################################################
        # (2) Generate fake images
        #######################################################
        noise.data.normal_(0, 1)
        fake_imgs, attention_maps, _, _ = self.netG(noise, sent_emb, words_embs, mask)

        # G attention
        cap_lens_np = cap_lens.cpu().data.numpy()

        # prefix for partitioning images
        prefix = datetime.now().strftime('%Y/%B/%d/%H_%M_%S_%f')
        urls = []
        # only look at first one
        for j in range(batch_size):
            for k in range(len(fake_imgs)):
                im = fake_imgs[k][j].data.cpu().numpy()
                im = (im + 1.0) * 127.5
                im = im.astype(np.uint8)
                im = np.transpose(im, (1, 2, 0))

                # save using saveable
                birdy = 'bird_g{}'.format(k)
                if copies > 2:
                    item = self.saveable.save('{}/{}'.format(prefix, j), birdy, im)
                else:
                    item = self.saveable.save(prefix, birdy, im)

                urls.append(item)

            if copies == 2:
                for k in range(len(attention_maps)):
                    if len(fake_imgs) > 1:
                        im = fake_imgs[k + 1].detach().cpu()
                    else:
                        im = fake_imgs[0].detach().cpu()
                            
                    attn_maps = attention_maps[k]
                    att_sze = attn_maps.size(2)

                    img_set, sentences = \
                        build_super_images2(im[j].unsqueeze(0),
                                            captions[j].unsqueeze(0),
                                            [cap_lens_np[j]], self.ixtoword,
                                            [attn_maps[j]], att_sze)

                    if img_set is not None:
                        attnmap = 'attmaps_a{}'.format(k)
                        item = self.saveable.save(prefix, attnmap, img_set)
                        urls.append(item)
            if copies == 2:
                break

        return urls
Example #7
0
    def gen_example(self, data_dic):
        if cfg.TRAIN.NET_G == '' or cfg.TRAIN.NET_C == '':
            print('Error: the path for main module or DCM is not found!')
        else:
            # The text encoder
            text_encoder = \
              RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
            state_dict = \
              torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            text_encoder.load_state_dict(state_dict)
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            # The image encoder
            image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM)
            img_encoder_path = cfg.TRAIN.NET_E.replace('text_encoder',
                                                       'image_encoder')
            state_dict = \
              torch.load(img_encoder_path, map_location=lambda storage, loc: storage)
            image_encoder.load_state_dict(state_dict)
            print('Load image encoder from:', img_encoder_path)
            image_encoder = image_encoder.cuda()
            image_encoder.eval()

            # The VGG network
            VGG = VGGNet()
            print("Load the VGG model")
            VGG.cuda()
            VGG.eval()

            # The main module
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET()
            s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')]
            model_dir = cfg.TRAIN.NET_G
            state_dict = \
              torch.load(model_dir, map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)
            netG.cuda()
            netG.eval()

            # The DCM
            netDCM = DCM_Net()
            if cfg.TRAIN.NET_C != '':
                state_dict = \
                  torch.load(cfg.TRAIN.NET_C, map_location=lambda storage, loc: storage)
                netDCM.load_state_dict(state_dict)
                print('Load DCM from: ', cfg.TRAIN.NET_C)
            netDCM.cuda()
            netDCM.eval()

            for key in data_dic:
                save_dir = '%s/%s' % (s_tmp, key)
                mkdir_p(save_dir)
                captions, cap_lens, sorted_indices, imgs = data_dic[key]

                batch_size = captions.shape[0]
                nz = cfg.GAN.Z_DIM
                captions = Variable(torch.from_numpy(captions), volatile=True)
                cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True)

                captions = captions.cuda()
                cap_lens = cap_lens.cuda()
                for i in range(1):
                    noise = Variable(torch.FloatTensor(batch_size, nz),
                                     volatile=True)
                    noise = noise.cuda()

                    #######################################################
                    # (1) Extract text and image embeddings
                    ######################################################
                    hidden = text_encoder.init_hidden(batch_size)

                    # The text embeddings
                    words_embs, sent_emb = text_encoder(
                        captions, cap_lens, hidden)

                    # The image embeddings
                    region_features, cnn_code = \
                      image_encoder(imgs[cfg.TREE.BRANCH_NUM - 1].unsqueeze(0))
                    mask = (captions == 0)

                    #######################################################
                    # (2) Modify real images
                    ######################################################
                    noise.data.normal_(0, 1)
                    fake_imgs, attention_maps, mu, logvar, h_code, c_code = netG(
                        noise, sent_emb, words_embs, mask, cnn_code,
                        region_features)

                    real_img = imgs[cfg.TREE.BRANCH_NUM - 1].unsqueeze(0)
                    real_features = VGG(real_img)[0]

                    fake_img = netDCM(h_code, real_features, sent_emb, words_embs, \
                                      mask, c_code)

                    cap_lens_np = cap_lens.cpu().data.numpy()
                    for j in range(batch_size):
                        save_name = '%s/%d_s_%d' % (save_dir, i,
                                                    sorted_indices[j])
                        for k in range(len(fake_imgs)):
                            im = fake_imgs[k][j].data.cpu().numpy()
                            im = (im + 1.0) * 127.5
                            im = im.astype(np.uint8)
                            im = np.transpose(im, (1, 2, 0))
                            im = Image.fromarray(im)
                            fullpath = '%s_g%d.png' % (save_name, k)
                            im.save(fullpath)

                        for k in range(len(attention_maps)):
                            if len(fake_imgs) > 1:
                                im = fake_imgs[k + 1].detach().cpu()
                            else:
                                im = fake_imgs[0].detach().cpu()
                            attn_maps = attention_maps[k]
                            att_sze = attn_maps.size(2)
                            img_set, sentences = \
                              build_super_images2(im[j].unsqueeze(0),
                                                  captions[j].unsqueeze(0),
                                                  [cap_lens_np[j]], self.ixtoword,
                                                  [attn_maps[j]], att_sze)
                            if img_set is not None:
                                im = Image.fromarray(img_set)
                                fullpath = '%s_a%d.png' % (save_name, k)
                                im.save(fullpath)

                        save_name = '%s/%d_sf_%d' % (save_dir, 1,
                                                     sorted_indices[j])
                        im = fake_img[j].data.cpu().numpy()
                        im = (im + 1.0) * 127.5
                        im = im.astype(np.uint8)
                        im = np.transpose(im, (1, 2, 0))
                        im = Image.fromarray(im)
                        fullpath = '%s_SF.png' % (save_name)
                        im.save(fullpath)

                    save_name = '%s/%d_s_%d' % (save_dir, 1, 9)
                    im = imgs[2].data.cpu().numpy()
                    im = (im + 1.0) * 127.5
                    im = im.astype(np.uint8)
                    im = np.transpose(im, (1, 2, 0))
                    im = Image.fromarray(im)
                    fullpath = '%s_SR.png' % (save_name)
                    im.save(fullpath)
Example #8
0
    def gen_example(self, data_dic):        
        if cfg.TRAIN.NET_G == '':
            print('Error: the path for morels is not found!')
        else:
            # Build and load the generator
            batch_size = 16
            text_encoder = \
                RNN_ENCODER(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
            print("=======self.n_words: %d", self.n_words)
            state_dict = \
                torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage)
            # customed restore text encoder parameters
            # ext_encoder.load_state_dict(state_dict)
            own_state = text_encoder.state_dict()
            for name, param in state_dict.items():
                if name not in own_state:
                    continue
                own_state[name] = param
            print('Load text encoder from:', cfg.TRAIN.NET_E)
            text_encoder = text_encoder.cuda()
            text_encoder.eval()

            # the path to save generated images
            if cfg.GAN.B_DCGAN:
                netG = G_DCGAN()
            else:
                netG = G_NET(text_encoder)
            s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')]
            model_dir = cfg.TRAIN.NET_G
            state_dict = \
                torch.load(model_dir, map_location=lambda storage, loc: storage)
            netG.load_state_dict(state_dict)
            print('Load G from: ', model_dir)
            netG.cuda()
            netG.eval()
            for key in data_dic:
                save_dir = '%s/%s' % (s_tmp, key)
                mkdir_p(save_dir)
                captions, cap_lens, sorted_indices = data_dic[key]

                # batch_size = captions.shape[0]
                total_time = len(captions)//batch_size
                nz = cfg.GAN.Z_DIM
                # captions = Variable(torch.from_numpy(captions), volatile=True)
                # cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True)

                # captions = captions.cuda()
                # cap_lens = cap_lens.cuda()
                with torch.no_grad():
                    for i in range(total_time):  # 16
                        noise = Variable(torch.FloatTensor(batch_size, nz))
                        noise = noise.cuda()
                        caption_tmp = Variable(torch.from_numpy(captions[i*batch_size:(i+1)*batch_size]))
                        if i < 3:
                            print(caption_tmp.data)
                        cap_len_tmp = Variable(torch.from_numpy(cap_lens[i*batch_size:(i+1)*batch_size]))
                        caption_tmp = caption_tmp.cuda()
                        cap_len_tmp = cap_len_tmp.cuda()
                        #######################################################
                        # (1) Extract text embeddings
                        ######################################################
                        hidden = text_encoder.init_hidden(batch_size)
                        # words_embs: batch_size x nef x seq_len
                        # sent_emb: batch_size x nef
                        words_embs, sent_emb, _ = text_encoder(caption_tmp, cap_len_tmp, None)
                        words_embs, sent_emb = words_embs.detach(), sent_emb.detach()
                        mask = (caption_tmp == 0)
                        #######################################################
                        # (2) Generate fake images
                        ######################################################
                        random.seed(datetime.now())
                        rnd= random.randint(0,1000)
                        torch.cuda.manual_seed(rnd)
                        noise.data.normal_(0, 1)                                          
                        fake_imgs, attention_maps, _, _, _ = netG(noise, sent_emb, words_embs, mask, caption_tmp, cap_len_tmp)                    
                        # G attention
                        # cap_lens_np = cap_lens.cpu().data.numpy()
                        cap_lens_np = cap_len_tmp.cpu().data.numpy()
                        for j in range(batch_size):
                            save_name = '%s/s_%d' % (save_dir, sorted_indices[i*batch_size+j])
                            for k in range(len(fake_imgs)):
                                im = fake_imgs[k][j].data.cpu().numpy()
                                im = ((im + 1.0) / 2)* 255.0
                                im = im.astype(np.uint8)
                                # print('im', im.shape)
                                im = np.transpose(im, (1, 2, 0))
                                # print('im', im.shape)
                                im = Image.fromarray(im)
                                fullpath = '%s_g%d.png' % (save_name, k)
                                im.save(fullpath)
                                # save to seperate directory                            
                                save_dir2 = '%s/stage_%d' % (save_dir, k)
                                mkdir_p(save_dir2)
                                fullpath = '%s/%d_g%d.png' % (save_dir2, sorted_indices[i*batch_size+j], k)
                                im.save(fullpath)

                            for k in range(len(attention_maps)):
                                if len(fake_imgs) > 1:
                                    im = fake_imgs[k + 1].detach().cpu()
                                else:
                                    im = fake_imgs[0].detach().cpu()
                                attn_maps = attention_maps[k]
                                att_sze = attn_maps.size(2)
                                img_set, sentences = \
                                    build_super_images2(im[j].unsqueeze(0),
                                                        caption_tmp[j].unsqueeze(0),
                                                        [cap_len_tmp[j]], self.ixtoword,
                                                        [attn_maps[j]], att_sze)
                                if img_set is not None:
                                    im = Image.fromarray(img_set)
                                    fullpath = '%s_a%d.png' % (save_name, k)
                                    im.save(fullpath)                            
Example #9
0
def generate_image_sent(sent,model_values):
  algo,text_encoder,netG,dataset=model_values
  my_caption=tokenize_sent(sent,dataset.wordtoix)
  my_cap_len=[len(my_caption[0])]
  
  #converting things into their proper forms
  batch_size = 1
  nz = cfg.GAN.Z_DIM
  my_caption = Variable(torch.from_numpy(np.array(my_caption)), volatile=True)
  my_cap_len = Variable(torch.from_numpy(np.array(my_cap_len)), volatile=True)

  my_caption = my_caption.type(torch.LongTensor)##changed this, f**k this line really

  if cfg.CUDA:
    my_caption = my_caption.cuda()
    my_cap_len = my_cap_len.cuda()


  #generating noise, mask and impt embeddings

  noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True)
  if cfg.CUDA:
    noise = noise.cuda()
  #######################################################
  # (1) Extract text embeddings
  ######################################################
  hidden = text_encoder.init_hidden(batch_size)
  # words_embs: batch_size x nef x seq_len
  # sent_emb: batch_size x nef
  words_embs, sent_emb = text_encoder(my_caption, my_cap_len, hidden)
  mask = (my_caption == 0)
  #######################################################
  # (2) Generate fake images
  ######################################################
  noise.data.normal_(0, 1)
  #print(noise, sent_emb, words_embs, mask)


  #Generating (Fake)Images
  my_fake_imgs, my_attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask)

  #important for extracting text back from tokenized form
  my_cap_lens_np = my_cap_len.cpu().data.numpy()

  #saving images
  for j in range(batch_size):     #which is always 1 for  sentance will remove this loop soon

    #save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j])
    save_name='output/my_img' #name any folder, right now its named output which you have to create manually inside AttnGAN/code

    for k in range(len(my_fake_imgs)):
        im = my_fake_imgs[k][j].data.cpu().numpy()
        im = (im + 1.0) * 127.5
        im = im.astype(np.uint8)
        # print('im', im.shape)
        im = np.transpose(im, (1, 2, 0))
        # print('im', im.shape)
        im = Image.fromarray(im)
        fullpath = '%s_g%d.png' % (save_name, k)
        im.save(fullpath)

    for k in range(len(my_attention_maps)):
        if len(my_fake_imgs) > 1:
            im = my_fake_imgs[k + 1].detach().cpu()
        else:
            im = my_fake_imgs[0].detach().cpu()
        attn_maps = my_attention_maps[k]
        att_sze = attn_maps.size(2)
        img_set, sentences = \
            build_super_images2(im[j].unsqueeze(0),
                                my_caption[j].unsqueeze(0),
                                [my_cap_lens_np[j]], algo.ixtoword,
                                [attn_maps[j]], att_sze)
        if img_set is not None:
            im = Image.fromarray(img_set)
            fullpath = '%s_a%d.png' % (save_name, k)
            im.save(fullpath)
Example #10
0
def generate(caption, wordtoix, ixtoword, text_encoder, netG, blob_service, copies=2):
    # load word vector
    captions, cap_lens  = vectorize_caption(wordtoix, caption, copies)
    n_words = len(wordtoix)

    # only one to generate
    batch_size = captions.shape[0]

    nz = cfg.GAN.Z_DIM
    captions = Variable(torch.from_numpy(captions), volatile=True)
    cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True)
    noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True)

    if cfg.CUDA:
        captions = captions.cuda()
        cap_lens = cap_lens.cuda()
        noise = noise.cuda()



    #######################################################
    # (1) Extract text embeddings
    #######################################################
    hidden = text_encoder.init_hidden(batch_size)
    words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
    mask = (captions == 0)


    #######################################################
    # (2) Generate fake images
    #######################################################
    noise.data.normal_(0, 1)
    fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask)

    # ONNX EXPORT
    #export = os.environ["EXPORT_MODEL"].lower() == 'true'
    if False:
        print("saving text_encoder.onnx")
        text_encoder_out = torch.onnx._export(text_encoder, (captions, cap_lens, hidden), "text_encoder.onnx", export_params=True)
        print("uploading text_encoder.onnx")
        blob_service.create_blob_from_path('models', "text_encoder.onnx", os.path.abspath("text_encoder.onnx"))
        print("done")

        print("saving netg.onnx")
        netg_out = torch.onnx._export(netG, (noise, sent_emb, words_embs, mask), "netg.onnx", export_params=True)
        print("uploading netg.onnx")
        blob_service.create_blob_from_path('models', "netg.onnx", os.path.abspath("netg.onnx"))
        print("done")
        return

    # G attention
    cap_lens_np = cap_lens.cpu().data.numpy()

    # storing to blob storage
    container_name = "images"
    full_path = "https://attgan123.blob.core.windows.net/images/%s"
    prefix = datetime.now().strftime('%Y/%B/%d/%H_%M_%S_%f')
    urls = []
    # only look at first one
    #j = 0
    for j in range(batch_size):
        for k in range(len(fake_imgs)):
            im = fake_imgs[k][j].data.cpu().numpy()
            im = (im + 1.0) * 127.5
            im = im.astype(np.uint8)
            im = np.transpose(im, (1, 2, 0))
            im = Image.fromarray(im)

            # save image to stream
            stream = io.BytesIO()
            im.save(stream, format="png")
            stream.seek(0)
            if copies > 2:
                blob_name = '%s/%d/%s_g%d.png' % (prefix, j, "bird", k)
            else:
                blob_name = '%s/%s_g%d.png' % (prefix, "bird", k)


            #blob_service.create_blob_from_stream(container_name, blob_name, stream)

            my_connection_string = MY_CONNECTION_STRING
            my_credential = MY_CREDENTIAL

            blob = BlobClient.from_connection_string(my_connection_string, container_name, blob_name, credential=my_credential)
            blob.upload_blob(stream, overwrite=True)





            urls.append(full_path % blob_name)

            if copies == 2:
                for k in range(len(attention_maps)):
                #if False:
                    if len(fake_imgs) > 1:
                        im = fake_imgs[k + 1].detach().cpu()
                    else:
                        im = fake_imgs[0].detach().cpu()

                    attn_maps = attention_maps[k]
                    att_sze = attn_maps.size(2)

                    img_set, sentences = \
                        build_super_images2(im[j].unsqueeze(0),
                                            captions[j].unsqueeze(0),
                                            [cap_lens_np[j]], ixtoword,
                                            [attn_maps[j]], att_sze)

                    if img_set is not None:
                        im = Image.fromarray(img_set)
                        stream = io.BytesIO()
                        im.save(stream, format="png")
                        stream.seek(0)

                        blob_name = '%s/%s_a%d.png' % (prefix, "attmaps", k)

                        #blob_service.create_blob_from_stream(container_name, blob_name, stream)
                        my_connection_string = "DefaultEndpointsProtocol=https;AccountName=attgantrain123;AccountKey=JtYnNYKOofdWsFkNhYjxL5dV7NuLM6Ad6mcgNoZvb2CQPeQCkzkic7OSbFnBdeW+bdHThlWM3akqP5oK9pP3RQ==;EndpointSuffix=core.windows.net"

                        blob = BlobClient.from_connection_string(my_connection_string, container_name, blob_name, credential=my_credential)
                        blob.upload_blob(stream, overwrite=True)


                        urls.append(full_path % blob_name)
        if copies == 2:
            break

    #print(len(urls), urls)
    return urls
Example #11
0
def gen_img(sentences):
    output = []
    '''generate images from example sentences'''
    from nltk.tokenize import RegexpTokenizer
    data_dic = {}

    captions = []
    cap_lens = []
    for sent in sentences:
        if len(sent) == 0:
            continue
        sent = sent.replace("\ufffd\ufffd", " ")
        tokenizer = RegexpTokenizer(r'\w+')
        tokens = tokenizer.tokenize(sent.lower())
        if len(tokens) == 0:
            print('sent', sent)
            continue

        rev = []
        for t in tokens:
            t = t.encode('ascii', 'ignore').decode('ascii')
            if len(t) > 0 and t in wordtoix:
                rev.append(wordtoix[t])
        captions.append(rev)
        cap_lens.append(len(rev))
    max_len = np.max(cap_lens)

    sorted_indices = np.argsort(cap_lens)[::-1]
    cap_lens = np.asarray(cap_lens)
    cap_lens = cap_lens[sorted_indices]
    cap_array = np.zeros((len(captions), max_len), dtype='int64')
    for i in range(len(captions)):
        idx = sorted_indices[i]
        cap = captions[idx]
        c_len = len(cap)
        cap_array[i, :c_len] = cap
    data_dic[0] = [cap_array, cap_lens, sorted_indices]
    for key in data_dic:
        save_dir = 'op/'
        captions, cap_lens, sorted_indices = data_dic[key]

        batch_size = captions.shape[0]
        nz = 100

        with torch.no_grad():
            captions = Variable(torch.from_numpy(captions))
            cap_lens = Variable(torch.from_numpy(cap_lens))

            captions = captions.cpu()
            cap_lens = cap_lens.cpu()

        for i in range(1):
            with torch.no_grad():
                noise = Variable(torch.FloatTensor(batch_size, nz))
                noise = noise.cpu()
            hidden = text_encoder.init_hidden(batch_size)

            words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
            mask = (captions == 0)
            noise.data.normal_(0, 1)
            fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs,
                                                   mask)
            cap_lens_np = cap_lens.cpu().data.numpy()
            for j in range(batch_size):
                save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j])
                for k in range(len(fake_imgs)):
                    im = fake_imgs[k][j].data.cpu().numpy()
                    im = (im + 1.0) * 127.5
                    im = im.astype(np.uint8)
                    im = np.transpose(im, (1, 2, 0))
                    im = Image.fromarray(im)
                    fullpath = '%s_g%d.png' % (save_name, k)
                    output.append(im)

                for k in range(len(attention_maps)):
                    if len(fake_imgs) > 1:
                        im = fake_imgs[k + 1].detach().cpu()
                    else:
                        im = fake_imgs[0].detach().cpu()
                    attn_maps = attention_maps[k]
                    att_sze = attn_maps.size(2)
                    img_set, sentences = \
                        build_super_images2(im[j].unsqueeze(0),
                                            captions[j].unsqueeze(0),
                                            [cap_lens_np[j]], dataset.ixtoword,
                                            [attn_maps[j]], att_sze)
                    if img_set is not None:
                        im = Image.fromarray(img_set)
                        output.append(im)
    return output
Example #12
0
def generate(caption, wordtoix, ixtoword, text_encoder, netG, copies=2):
    # load word vector
    captions, cap_lens = vectorize_caption(wordtoix, caption, copies)

    # only one to generate
    batch_size = captions.shape[0]

    nz = cfg.GAN.Z_DIM
    with torch.no_grad():
        captions = Variable(torch.from_numpy(captions))
        cap_lens = Variable(torch.from_numpy(cap_lens))
        noise = Variable(torch.FloatTensor(batch_size, nz))

    if cfg.CUDA:
        captions = captions.cuda()
        cap_lens = cap_lens.cuda()
        noise = noise.cuda()

    #######################################################
    # (1) Extract text embeddings
    #######################################################
    hidden = text_encoder.init_hidden(batch_size)
    words_embs, sent_emb = text_encoder(captions, cap_lens, hidden)
    mask = (captions == 0)

    #######################################################
    # (2) Generate fake images
    #######################################################
    noise.data.normal_(0, 1)
    fake_imgs, attention_maps, _, _ = netG(noise, sent_emb, words_embs, mask)

    # G attention
    cap_lens_np = cap_lens.cpu().data.numpy()

    names2images = {}

    for j in range(batch_size):
        for k in range(len(fake_imgs)):
            im = fake_imgs[k][j].data.cpu().numpy()
            im = (im + 1.0) * 127.5
            im = im.astype(np.uint8)
            im = np.transpose(im, (1, 2, 0))
            im = rdn.predict(im)
            im = rdn.predict(im)

            # save image to stream
            if copies > 2:
                blob_name = osp.join(str(j), f'coco_g{k}.png')
            else:
                blob_name = f'coco_g{k}.png'
            names2images[blob_name] = im

        for k in range(len(attention_maps)):
            if len(fake_imgs) > 1:
                im = fake_imgs[k + 1].detach().cpu()
            else:
                im = fake_imgs[0].detach().cpu()

            attn_maps = attention_maps[k]
            att_sze = attn_maps.size(2)

            img_set, sentences = \
                build_super_images2(im[j].unsqueeze(0),
                                    captions[j].unsqueeze(0),
                                    [cap_lens_np[j]], ixtoword,
                                    [attn_maps[j]], att_sze)

            if img_set is not None:
                blob_name = f'attmaps_a{k}.png'
                names2images[blob_name] = img_set

    return names2images