Exemple #1
0
    def __getitem__(self, index):

        if self.mode.lower() == 'train':
            data_path, label_path = self.train_data[index], self.train_labels[
                index]

        elif self.mode.lower() == 'valid':
            data_path, label_path = self.valid_data[index], self.valid_labels[
                index]

        elif self.mode.lower() == 'test':
            data_path, label_path = self.test_data[index], self.test_labels[
                index]

        else:
            raise RuntimeError(
                'Unexpected dataset mode. Supported modes are: train, valid and test'
            )

        image, label = utils.pil_loader(data_path, label_path)

        if self.data_transform is not None:
            image = self.data_transform(image)

        if self.label_transform is not None:
            label = self.label_transform(label)

        # perform one-hot-encoding
        target = utils.one_hot_encode(label)
        target = torch.FloatTensor(target)

        return image, label, target
def extract_feature(model, image_path):
    try:
        img = pil_loader(image_path)
        img = img.resize((256, 256))
        img_data = image.img_to_array(img)
    except:
        img_data = np.zeros((256, 256, 3))
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    feature = model.predict(img_data)
    return feature
 def _read(self, idx=None):
     if idx is None:
         idx = np.random.randint(self.num)
     fn = os.path.join(self.prefix, self.img_lst[idx])
     lb = self.lb_lst[idx]
     try:
         img = pil_loader(open(fn, 'rb').read())
         return img, lb
     except Exception as err:
         print('Read image[{}, {}] failed ({})'.format(idx, fn, err))
         return self._read()
Exemple #4
0
 def _read(self, idx=None):
     if idx == None:
         idx = np.random.randint(self.num)
     idx %= self.num
     fn = self.img_lst[idx]
     lb = self.lb_lst[idx]
     try:
         img = pil_loader(open(fn, 'rb').read())
         return img, lb, fn
     except Exception as err:
         print('Read image[{}, {}] failed ({})'.format(idx, fn, err))
         return self._read()
Exemple #5
0
def read_image(src, line):
    line = line.strip()
    line = line.split()
    path1 = os.path.join(src, line[0])
    path2 = os.path.join(src, line[1])
    path3 = os.path.join(src, line[2])
    target = int(line[3])

    try:
        img1 = utils.pil_loader(path1)
    except:
        target = 0
        random_r = random.randint(0, 255)
        random_g = random.randint(0, 255)
        random_b = random.randint(0, 255)
        img1 = Image.new('RGB', (opt.imgH, opt.imgH),
                         (random_r, random_g, random_b))

    try:
        img2 = utils.pil_loader(path2)
    except:
        target = 0
        random_r = random.randint(0, 255)
        random_g = random.randint(0, 255)
        random_b = random.randint(0, 255)
        img2 = Image.new('RGB', (opt.imgH, opt.imgH),
                         (random_r, random_g, random_b))

    try:
        img3 = utils.pil_loader(path3)
    except:
        target = 0
        random_r = random.randint(0, 255)
        random_g = random.randint(0, 255)
        random_b = random.randint(0, 255)
        img3 = Image.new('RGB', (opt.imgH, opt.imgH),
                         (random_r, random_g, random_b))

    return (img1, img2, img3), target
Exemple #6
0
 def __getitem__(self, index: int) -> dict:
     """
     :param index: The index of the item to retrieve
     :return: One data pair (image and caption).
     """
     imgid = self.imgids[index]
     image_path = self.imagepaths[imgid]
     image = utils.pil_loader(image_path)  # self.load_image(image_path)
     return {
         'image': image,
         'id': imgid,
         'image_file': os.path.basename(image_path),
     }
 def _load_image(self, fn):
     if self.memcached:
         try:
             img_value = mc.pyvector()
             self.mclient.Get(fn, img_value)
             img_value_str = mc.ConvertBuffer(img_value)
             img = utils.pil_loader(img_value_str)
         except:
             print('Read image failed ({})'.format(fn))
             raise Exception("Exit")
         else:
             return img
     else:
         return Image.open(fn).convert('RGB')
Exemple #8
0
updown_pred = [updown_qent[ent['question_id']]['answer'] for ent in label]
import utils
utils.accuracy(gt, updown_pred)
RCN_pred = [RCN_qent[ent['question_id']] for ent in label]
utils.accuracy(gt, RCN_pred)

#%%
from tqdm import tqdm
import utils
import os
import matplotlib.pyplot as plt
#%%
entdis = []
for ent in tqdm(testset):
    qid = ent['question_id']
    if RCN_qent[qid] != int(updown_qent[qid]['answer']):
        if RCN_qent[qid] != ent['answer'] and ent['data_source'] == 'amt':
            if int(updown_qent[qid]['answer']) == ent['answer']:
                ent['disagree'] = 'RCN = {} updown = {}'.format(
                    RCN_qent[qid], int(updown_qent[qid]['answer']))
                entdis.append(ent)
                path = os.path.join('/home/manoj/', ent['image'])
                a = utils.pil_loader(path)
                plt.imshow(np.asarray(a))
                plt.title("{} {}".format(ent['question'], ent['answer']))
                plt.xlabel(ent['disagree'])
                plt.ylabel(ent['question_id'])
                l = ent['image'].split("/")[-1]
                plt.savefig("disagg/" + l, dpi=150)
                plt.close()
def generate_caption_visualization(encoder,
                                   decoder,
                                   img_path,
                                   word_dict,
                                   beam_size=3):
    '''
    Function to visualize the step by step development of the caption along with the corresponding attention component visualization.
    
    Arguments:
        encoder: Instance of the trained Encoder for encoding of images
        decoder: Instance of the trained Decoder for caption prediction from encoded image
        img_path (str): Complete path of the image to be visualized
        word_dict (dict): Dictionary of words (vocabulary)
        beam_size (int): Number of top candidates to consider for beam search. Default = 3
    '''

    # Load the image and transform it
    img = pil_loader(img_path)
    img = data_transforms(img)
    img = torch.FloatTensor(img)
    img = img.unsqueeze(0)

    # Get the caption and the corresponding attention weights from the trained network
    img_features = encoder(img)
    img_features = img_features.expand(beam_size, img_features.size(1),
                                       img_features.size(2))
    sentence, alpha = decoder.caption(img_features, beam_size)

    # Using the dictionary, convert the encoded caption to normal words
    token_dict = {idx: word for word, idx in word_dict.items()}
    sentence_tokens = []
    for word_idx in sentence:
        sentence_tokens.append(token_dict[word_idx])
        if word_idx == word_dict['<eos>']:
            break

    # Resizing image for a standard display
    img = Image.open(img_path)
    w, h = img.size
    if w > h:
        w = w * 256 / h
        h = 256
    else:
        h = h * 256 / w
        w = 256
    left = (w - 224) / 2
    top = (h - 224) / 2
    resized_img = img.resize((int(w), int(h)), Image.BICUBIC).crop(
        (left, top, left + 224, top + 224))
    img = np.array(resized_img.convert('RGB').getdata()).reshape(224, 224, 3)
    img = img.astype('float32') / 255

    num_words = len(sentence_tokens)
    w = np.round(np.sqrt(num_words))
    h = np.ceil(np.float32(num_words) / w)
    alpha = torch.tensor(alpha)

    # Plot the different attention weighted versions of the original image along with the resultant caption word prediction
    f = plt.figure(figsize=(8, 9))
    plot_height = ceil((num_words + 3) / 4.0)
    ax1 = f.add_subplot(4, plot_height, 1)
    plt.imshow(img)
    plt.axis('off')
    for idx in range(num_words):
        ax2 = f.add_subplot(4, plot_height, idx + 2)
        label = sentence_tokens[idx]
        plt.text(0, 1, label, backgroundcolor='white', fontsize=13)
        plt.text(0, 1, label, color='black', fontsize=13)
        plt.imshow(img)

        if encoder.network == 'vgg19':
            shape_size = 14
        else:
            shape_size = 7

        alpha_img = skimage.transform.pyramid_expand(alpha[idx, :].reshape(
            shape_size, shape_size),
                                                     upscale=16,
                                                     sigma=20)

        plt.imshow(alpha_img, alpha=0.8)
        plt.set_cmap(cm.Greys_r)
        plt.axis('off')
    plt.show()
def generate_image_caption(encoder,
                           decoder,
                           img_path,
                           word_dict,
                           beam_size=3,
                           ax=plt):
    '''
    Function to display the image along with the resultant predicted caption.
    
    Arguments:
        encoder: Instance of the trained Encoder for encoding of images
        decoder: Instance of the trained Decoder for caption prediction from encoded image
        img_path (str): Complete path of the image to be visualized
        word_dict (dict): Dictionary of words (vocabulary)
        beam_size (int): Number of top candidates to consider for beam search. Default = 3
        ax: axes for plotting
    '''

    # Load the image and transform it
    img = pil_loader(img_path)
    img = data_transforms(img)
    img = torch.FloatTensor(img)
    img = img.unsqueeze(0)

    # Get the caption from the trained network
    img_features = encoder(img)
    img_features = img_features.expand(beam_size, img_features.size(1),
                                       img_features.size(2))
    sentence, alpha = decoder.caption(img_features, beam_size)

    # Using the dictionary, convert the encoded caption to normal words
    token_dict = {idx: word for word, idx in word_dict.items()}
    sentence_tokens = []
    for word_idx in sentence:
        if word_idx == word_dict['<start>']:
            continue
        if word_idx == word_dict['<eos>']:
            break
        sentence_tokens.append(token_dict[word_idx])

    # Resizing image for a standard display
    img = Image.open(img_path)
    w, h = img.size
    if w > h:
        w = w * 256 / h
        h = 256
    else:
        h = h * 256 / w
        w = 256
    left = (w - 224) / 2
    top = (h - 224) / 2
    resized_img = img.resize((int(w), int(h)), Image.BICUBIC).crop(
        (left, top, left + 224, top + 224))
    img = np.array(resized_img.convert('RGB').getdata()).reshape(224, 224, 3)
    img = img.astype('float32') / 255

    # Creation of a sentence from the list of words
    caption = ''
    for word in sentence_tokens:
        if word is sentence_tokens[len(sentence_tokens) - 1]:
            caption = caption + word + '.'
        else:
            caption = caption + word + ' '

    ax.imshow(img)
    ax.set_title(caption.capitalize())
    ax.axis('off')
def get_image(path):
    im = pil_loader(path)
    im = im.convert("RGB")
    im = transform_train(im)
    im = torch.reshape(im, (1, ) + im.shape)
    return im