Example #1
0
    def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_names=None,
                  class_map=None, return_coco=False, auto_download=False, total_images=None):
        """Load a subset of the COCO dataset.
        dataset_dir: The root directory of the COCO dataset.
        subset: What to load (train, val, minival, valminusminival)
        year: What dataset year to load (2014, 2017) as a string, not an integer
        class_ids: If provided, only loads images that have the given classes.
        class_map: TODO: Not implemented yet. Supports maping classes from
            different datasets to the same class ID.
        return_coco: If True, returns the COCO object.
        auto_download: Automatically download and unzip MS-COCO images and annotations
        """
        dataset_dir = os.path.join(dataset_dir, "coco")

        if auto_download is True:
            self.auto_download(dataset_dir, subset, year)

        coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
        if subset == "minival" or subset == "valminusminival":
            subset = "val"
        image_dir = "{}/{}{}".format(dataset_dir, subset, year)

        # All classes
        class_ids = sorted(coco.getCatIds())

        # Only subset of classes
        if class_names:
            class_ids = list(filter(lambda id: coco.loadCats(id)[0]["name"] in class_names, class_ids))

        images_per_class = sys.maxsize if total_images is None or total_images <= 0 else int(
            total_images // len(class_ids))

        # All images or a subset?
        if class_ids:
            image_ids = []
            for id in class_ids:
                images = list(coco.getImgIds(catIds=[id]))
                image_ids.extend(images if len(images) < images_per_class else images[:images_per_class])
            # Remove duplicates
            image_ids = list(set(image_ids))
        else:
            # All images
            image_ids = list(coco.imgs.keys())

        # Add classes
        for i in class_ids:
            self.add_class("coco", i, coco.loadCats(i)[0]["name"])

        # Add images
        for i in image_ids:
            self.add_image(
                "coco", image_id=i,
                path=os.path.join(image_dir, coco.imgs[i]['file_name']),
                width=coco.imgs[i]["width"],
                height=coco.imgs[i]["height"],
                annotations=coco.loadAnns(coco.getAnnIds(
                    imgIds=[i], catIds=class_ids, iscrowd=None)))
        if return_coco:
            return coco
Example #2
0
def build_vocab(json, threshold):
    """Build a simple vocabulary wrapper."""
    coco = COCO(json)
    counter = Counter()
    ids = coco.anns.keys()
    for i, id in enumerate(ids):
        caption = str(coco.anns[id]['caption'])
        tokens = nltk.tokenize.word_tokenize(caption.lower())
        counter.update(tokens)

        if i % 1000 == 0:
            print("[%d/%d] Tokenized the captions." % (i, len(ids)))

    # If the word frequency is less than 'threshold', then the word is discarded.
    words = [word for word, cnt in counter.items() if cnt >= threshold]

    # Creates a vocab wrapper and add some special tokens.
    vocab = Vocabulary()
    vocab.add_word('<pad>')
    vocab.add_word('<start>')
    vocab.add_word('<end>')
    vocab.add_word('<unk>')

    # Adds the words to the vocabulary.
    for i, word in enumerate(words):
        vocab.add_word(word)
    return vocab
Example #3
0
    def __init__(self, root, json, vocab, transform=None):
        """Set the path for images, captions and vocabulary wrapper.

        Args:
            root: image directory.
            json: coco annotation file path.
            vocab: vocabulary wrapper.
            transform: image transformer.
        """

        self.root = root
        self.coco = COCO(json)
        self.ids = list(self.coco.anns.keys())
        self.vocab = vocab
        self.transform = transform
Example #4
0
def coco_eval(model, args, epoch):
    '''
    model: trained model to be evaluated
    args: pre-set parameters
    epoch: epoch #, for disp purpose
    '''

    model.eval()

    # Validation images are required to be resized to 224x224 already
    transform = transforms.Compose([
        transforms.Scale((args.crop_size, args.crop_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load the vocabulary
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Wrapper the COCO VAL dataset
    eval_data_loader = torch.utils.data.DataLoader(
        CocoEvalLoader(args.image_dir, args.caption_val_path, transform),
        batch_size=args.eval_size,
        shuffle=False,
        num_workers=args.num_workers,
        drop_last=False)

    # Generated captions to be compared with GT
    results = []
    print '---------------------Start evaluation on MS-COCO dataset-----------------------'
    for i, (images, image_ids, _) in enumerate(eval_data_loader):

        images = to_var(images)
        generated_captions, _, _ = model.sampler(images)

        if torch.cuda.is_available():
            captions = generated_captions.cpu().data.numpy()
        else:
            captions = generated_captions.data.numpy()

        # Build caption based on Vocabulary and the '<end>' token
        for image_idx in range(captions.shape[0]):

            sampled_ids = captions[image_idx]
            sampled_caption = []

            for word_id in sampled_ids:

                word = vocab.idx2word[word_id]
                if word == '<end>':
                    break
                else:
                    sampled_caption.append(word)

            sentence = ' '.join(sampled_caption)

            temp = {'image_id': int(image_ids[image_idx]), 'caption': sentence}
            results.append(temp)

        # Disp evaluation process
        if (i + 1) % 10 == 0:
            print '[%d/%d]' % ((i + 1), len(eval_data_loader))

    print '------------------------Caption Generated-------------------------------------'

    # Evaluate the results based on the COCO API
    resFile = 'results/mixed-' + str(epoch) + '.json'
    json.dump(results, open(resFile, 'w'))

    annFile = args.caption_val_path
    coco = COCO(annFile)
    cocoRes = coco.loadRes(resFile)

    cocoEval = COCOEvalCap(coco, cocoRes)
    cocoEval.params['image_id'] = cocoRes.getImgIds()
    cocoEval.evaluate()

    # Get CIDEr score for validation evaluation
    cider = 0.
    print '-----------Evaluation performance on MS-COCO validation dataset for Epoch %d----------' % (
        epoch)
    for metric, score in cocoEval.eval.items():

        print '%s: %.4f' % (metric, score)
        if metric == 'CIDEr':
            cider = score

    return cider
Example #5
0
def main( args ):
    
    '''
    model: trained model to be evaluated
    args: parameters
    '''
    # Load vocabulary wrapper.
    with open( args.vocab_path, 'rb') as f:
        vocab = pickle.load( f )
    # Load trained model
    model = Encoder2Decoder( args.embed_size, len(vocab), args.hidden_size )
    model.load_state_dict(torch.load(args.trained))

    # Change to GPU mode if available
    if torch.cuda.is_available():
        model.cuda()

    model.eval()
    
    transform = transforms.Compose([ 
        transforms.Resize( (args.crop_size, args.crop_size) ),
        transforms.ToTensor(), 
        transforms.Normalize((0.485, 0.456, 0.406), 
                             (0.229, 0.224, 0.225))])

    # Wrapper the COCO VAL dataset
    eval_data_loader = torch.utils.data.DataLoader( 
        CocoEvalLoader( args.image_dir, args.caption_test_path, args.topic_path, transform ),
        batch_size = args.eval_size, 
        shuffle = False, num_workers = args.num_workers,
        drop_last = False )  
    epoch = int( args.trained.split('/')[-1].split('-')[1].split('.')[0] )
    
    # Generated captions to be compared with GT
    results = []
    print '---------------------Start evaluation on MS-COCO dataset-----------------------'
    for i, (images, image_ids, _, T_val ) in enumerate( eval_data_loader ):
        
        images = to_var( images )
        T_val = to_var( T_val )
        generated_captions = model.sampler( epoch, images, T_val )

        if torch.cuda.is_available():
            captions = generated_captions.cpu().data.numpy()
        else:
            captions = generated_captions.data.numpy()

        # Build caption based on Vocabulary and the '<end>' token
        for image_idx in range( captions.shape[0] ):
            
            sampled_ids = captions[ image_idx ]
            sampled_caption = []
            
            for word_id in sampled_ids:
                
                word = vocab.idx2word[ word_id ]
                if word == '<end>':
                    break
                else:
                    sampled_caption.append( word )
            
            sentence = ' '.join( sampled_caption )
            
            temp = { 'image_id': int( image_ids[ image_idx ] ), 'caption': sentence}
            results.append( temp )
        
        # Disp evaluation process
        if (i+1) % 10 == 0:
            print '[%d/%d]'%( (i+1),len( eval_data_loader ) ) 

    print '------------------------Caption Generated-------------------------------------'
            
    # Evaluate the results based on the COCO API
    resFile = args.save_path
    json.dump( results, open( resFile , 'w' ) )
    
    annFile = args.caption_test_path
    coco = COCO( annFile )
    cocoRes = coco.loadRes( resFile )
    
    cocoEval = COCOEvalCap( coco, cocoRes )
    cocoEval.params['image_id'] = cocoRes.getImgIds() 
    cocoEval.evaluate()

    print '-----------Evaluation performance on MS-COCO dataset----------'
    for metric, score in cocoEval.eval.items():
        print '%s: %.4f'%( metric, score )
def main(args):

    ################################################################################
    # Validate input arguments
    ################################################################################
    assert not (
        args.concat and (not args.multicrop)
    ), "Cannot test concatenated labels on single image crop per batch."
    assert not (args.classes and args.concat
                ), "Cannot test concatenated labels when using image classes"
    assert not (
        args.classes and (not args.multicrop)
    ), "Cannot test on single image per batch when using image classes"

    # Initialize GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = args.GPU_ID

    # print mode
    print()
    print("Model:", pretrained_model)
    print("All crops per batch - True | First crop per batch - False:",
          args.multicrop)
    print("Concatenated captions - True | Simple captions - False:",
          args.concat)
    print("Image Classes - True | Image Descriptions - False:", args.classes)
    print()

    ################################################################################
    # Evaluation network
    ################################################################################

    # Inputs
    text_seq_batch = tf.placeholder(tf.int32, [T, N])
    imcrop_batch = tf.placeholder(tf.float32, [N, 224, 224, 3])
    lstm_top_batch = tf.placeholder(tf.float32, [N, D_text])
    fc8_crop_batch = tf.placeholder(tf.float32, [N, D_im])

    # Language feature (LSTM hidden state)
    lstm_top = lstm_net.lstm_net(text_seq_batch, num_vocab, embed_dim,
                                 lstm_dim)

    # Local image feature
    fc8_crop = vgg_net.vgg_fc8(imcrop_batch, 'vgg_local', apply_dropout=False)

    # L2-normalize the features (except for spatial_batch)
    # and concatenate them along axis 1 (feature dimension)
    feat_all = tf.concat(axis=1,
                         values=[
                             tf.nn.l2_normalize(lstm_top_batch, 1),
                             tf.nn.l2_normalize(fc8_crop_batch, 1)
                         ])

    # Outputs
    # MLP Classifier over concatenate feature
    with tf.variable_scope('classifier'):
        mlp_l1 = fc_relu('mlp_l1', feat_all, output_dim=mlp_hidden_dims)
        mlp_l2 = fc('mlp_l2', mlp_l1, output_dim=1)
    scores = mlp_l2

    # Load pretrained model
    snapshot_restorer = tf.train.Saver(None)
    sess = tf.Session()
    snapshot_restorer.restore(sess, pretrained_model)

    ################################################################################
    # Load annotations and bounding box proposals
    ################################################################################

    coco = COCO(query_file)
    coco_captions = COCO(caption_file)
    imgid_list = coco.getImgIds()
    catid_list = coco.getCatIds()

    ################################################################################
    # Load testing data
    ################################################################################

    testing_samples_pos = []
    testing_samples_neg = []
    num_imcrop = len(imgid_list)

    # Gather a testing example per full image.
    for n_imcrop in range(num_imcrop):
        # image
        img_id = imgid_list[n_imcrop]

        # get the decriptions of the image
        caption_ids = coco_captions.getAnnIds(imgIds=img_id)
        captions = [
            x['caption'].strip() for x in coco_captions.loadAnns(caption_ids)
        ]

        if args.concat:
            # append two positive captions; one with itself if only one present
            pos_desc = captions[0] + ' and ' + captions[len(captions) - 1]
            testing_samples_pos.append((img_id, pos_desc, 1))

            # form negative examples by choosing random image
            # that is not the current image, get its descriptions,
            # and choose one at random.
            false_idx = n_imcrop
            while false_idx == n_imcrop:
                false_idx = randint(0, num_imcrop - 1)
            desc_ids = coco_captions.getAnnIds(imgid_list[false_idx])
            desc_idx = randint(0, len(desc_ids) - 1)
            neg_desc1 = coco_captions.loadAnns(
                desc_ids[desc_idx])[0]['caption'].strip()

            false_idx = n_imcrop
            while false_idx == n_imcrop:
                false_idx = randint(0, num_imcrop - 1)
            desc_ids = coco_captions.getAnnIds(imgid_list[false_idx])
            desc_idx = randint(0, len(desc_ids) - 1)
            neg_desc2 = coco_captions.loadAnns(
                desc_ids[desc_idx])[0]['caption'].strip()

            # negative example: append two negative captions
            neg_desc = neg_desc1 + ' and ' + neg_desc2
            testing_samples_neg.append((img_id, neg_desc, 0))

            # negative example: append one negative and one positive example
            neg_desc = neg_desc1 + ' and ' + captions[0].strip()
            testing_samples_neg.append((img_id, neg_desc, 0))
            neg_desc = captions[0].strip() + ' and ' + neg_desc1
            testing_samples_neg.append((img_id, neg_desc, 0))

        # for appending image captions
        elif args.classes:
            img_catids = coco.getCatIds(imgIds=img_id)
            img_cat_names = [cat['name'] for cat in coco.loadCats(img_catids)]
            for category in img_cat_names:
                testing_samples_pos.append((img_id, category, 1))

                # form one negative example by choosing random category that
                # img is not in
                false_catid = img_catids[0]
                while false_catid in img_catids:
                    false_catid = catid_list[randint(0, len(catid_list) - 1)]
                false_cat_name = coco.loadCats(false_catid)[0]['name']
                testing_samples_neg.append((img_id, false_cat_name, 0))

        else:
            for caption in captions:
                # append one positive sample per description
                testing_samples_pos.append((img_id, caption, 1))

                # form one negative example by choosing random image
                # that is not the current image, get its descriptions,
                # and choose one at random.
                false_idx = n_imcrop
                while false_idx == n_imcrop:
                    false_idx = randint(0, num_imcrop - 1)
                desc_ids = coco_captions.getAnnIds(imgid_list[false_idx])
                desc_idx = randint(0, len(desc_ids) - 1)
                false_cap = coco_captions.loadAnns(
                    desc_ids[desc_idx])[0]['caption'].strip()

                testing_samples_neg.append((img_id, false_cap, 0))

    # Combine samples
    print('#pos=', len(testing_samples_pos))
    print('#neg=', len(testing_samples_neg))

    # TODO: Not exactly sure what your multicrop is testing here? Just removes the
    # positive examples from being tested? How is this useful?
    if args.multicrop:
        testing_samples = testing_samples_pos + testing_samples_neg
    else:
        testing_samples = testing_samples_neg

    print('#total testing examples=', len(testing_samples))
    num_batch = len(testing_samples) // N
    print('total batch number: %d' % num_batch)

    ################################################################################
    # Testing
    ################################################################################

    # Pre-allocate arrays
    imcrop_val = np.zeros((N, 224, 224, 3), dtype=np.float32)
    text_seq_val = np.zeros((T, N), dtype=np.int32)
    lstm_top_val = np.zeros((N, D_text))
    label_val = np.zeros((N, 1), dtype=np.bool)

    correct_predictions = 0
    total_predictions = 0

    # optimization for faster image loading
    last_img_id = -100
    last_imcrop = None

    for n_batch in range(num_batch):
        print('batch %d / %d' % (n_batch + 1, num_batch))
        batch_begin = n_batch * N
        batch_end = (n_batch + 1) * N

        # load and preprocess last image from previous batch
        first_img_id = testing_samples[max(batch_begin - 1, 0)][0]
        first_imname = coco.loadImgs(first_img_id)[0]['coco_url']
        first_im = skimage.io.imread(first_imname)
        first_imcrop = skimage.img_as_ubyte(
            skimage.transform.resize(first_im, [224, 224]))
        if len(np.shape(first_im)) != 3: continue

        for n_sample in range(batch_begin, batch_end):
            img_id, description, label = testing_samples[n_sample]

            # Preprocess image and caption
            if args.multicrop:
                # Optimization: do not reload image if it is the same as the last one
                if img_id == last_img_id:
                    imcrop = last_imcrop
                else:
                    imname = coco.loadImgs(img_id)[0]['coco_url']
                    im = skimage.io.imread(imname)

                    # ignore grayscale images
                    if len(np.shape(im)) != 3: continue

                    imcrop = skimage.img_as_ubyte(
                        skimage.transform.resize(im, [224, 224]))
                    last_img_id = img_id
                    last_imcrop = imcrop
            else:
                imcrop = first_imcrop
            text_seq = text_processing.preprocess_sentence(
                description, vocab_dict, T)

            # Form batch
            idx = n_sample - batch_begin
            text_seq_val[:, idx] = text_seq
            imcrop_val[idx, ...] = imcrop - vgg_net.channel_mean
            label_val[idx] = label

        # Extract visual feature
        fc8_crop_val = sess.run(fc8_crop, feed_dict={imcrop_batch: imcrop_val})

        # Extract language feature
        lstm_top_val[...] = sess.run(lstm_top,
                                     feed_dict={text_seq_batch: text_seq_val})

        # Compute scores per proposal
        scores_val = sess.run(scores,
                              feed_dict={
                                  lstm_top_batch: lstm_top_val,
                                  fc8_crop_batch: fc8_crop_val
                              })
        scores_val = scores_val[:batch_end - batch_begin + 1, ...].reshape(-1)

        # Evaluate on bounding labels
        for indx in range(len(scores_val)):
            correct_predictions += ((scores_val[indx] > 0) == label_val[indx])
            total_predictions += 1

        print("%d correct predictions out of %d" %
              (correct_predictions, total_predictions))
        print(correct_predictions / total_predictions)

    print('Final results on the whole test set')
    result_str = 'recall = %0.4f \n' % (float(correct_predictions) /
                                        total_predictions)
    print(result_str)
Example #7
0
File: Test.py Project: lancopku/MIA
def main(args):
    '''
    model: trained model to be evaluated
    args: parameters
    '''
    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Create results directory
    if not os.path.exists(os.path.join(args.result_path, args.basic_model)):
        os.makedirs(os.path.join(args.result_path, args.basic_model))

    # List and sort all checkpoints in the storage directory
    if args.use_MIA:
        checkpoint_dir = os.path.join(args.save_dir_path,
                                      args.basic_model + "-MIA")
    else:
        checkpoint_dir = os.path.join(args.save_dir_path, args.basic_model)

    checkpoint_list = os.listdir(checkpoint_dir)
    checkpoint_list.sort()

    # Load Caption Model
    for checkpoint in checkpoint_list:
        checkpoint_path = os.path.join(checkpoint_dir, checkpoint)
        Caption_Generator = Generator(args, checkpoint_path, len(vocab))

        transform = transforms.Compose([
            transforms.Resize((args.crop_size, args.crop_size)),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

        # Wrapper the COCO VAL dataset
        eval_data_loader = torch.utils.data.DataLoader(
            CocoEvalLoader(args.image_dir, args.caption_test_path,
                           args.concept_path, vocab, transform),
            batch_size=args.eval_batch_size,
            shuffle=False,
            num_workers=args.num_workers,
            drop_last=False)

        epoch = int(checkpoint.split('-')[1].split('.')[0])

        # Generated captions to be compared with GT
        results = []

        print '---------------------Start evaluation on MS-COCO dataset-----------------------'
        for i, (images, image_concepts, image_ids,
                _) in enumerate(eval_data_loader):
            images = to_var(images)
            image_concepts = to_var(image_concepts)
            all_hyp, all_scores = Caption_Generator.translate_batch(
                images, image_concepts)

            # Build caption based on Vocabulary and the '<end>' token
            for image_idx in range(len(all_hyp)):

                all_sentence = []
                for num_i in range(args.n_best):
                    sampled_ids = all_hyp[image_idx][num_i]
                    sampled_caption = []

                    for word_id in sampled_ids:

                        word = vocab.idx2word[word_id]
                        if word == '<end>':
                            break
                        else:
                            sampled_caption.append(word)

                    sentence = ' '.join(sampled_caption)
                    all_sentence.append(sentence)

                best_sentence = all_sentence[0]
                temp = {
                    'image_id': int(image_ids[image_idx]),
                    'caption': best_sentence
                }
                results.append(temp)

            # Disp evaluation process
            if (i + 1) % (1000 / args.eval_batch_size) == 0:
                print '[%d/%d]' % ((i + 1), len(eval_data_loader))

        print '------------------------Caption Generated-------------------------------------'

        # Evaluate the results based on the COCO API
        resFile = os.path.join(args.result_path, args.basic_model,
                               'Caption-%d.json' % (epoch))
        json.dump(results, open(resFile, 'w'))

        annFile = args.caption_test_path
        coco = COCO(annFile)
        cocoRes = coco.loadRes(resFile)

        cocoEval = COCOEvalCap(coco, cocoRes)
        cocoEval.params['image_id'] = cocoRes.getImgIds()
        cocoEval.evaluate()

        print '-----------Evaluation performance on MS-COCO dataset----------'

        if args.use_MIA:
            save_file = args.save_score_file + '-' + args.basic_model + "-MIA"
        else:
            save_file = args.save_score_file + '-' + args.basic_model

        f = open(save_file, 'a')

        f.write('\n The evaluation scores about epoch %d are: \n' % (epoch))
        for metric, score in cocoEval.eval.items():
            f.write('\n%s: %.4f\n' % (metric, score))
            print '%s: %.4f' % (metric, score)

        f.close()
Example #8
0
data_prefix = 'coco_train_cls'

# Model Params
T = 20
N = 10  # number of items per batch
input_H = 224
input_W = 224

# num false samples per positive sample
F = 1

################################################################################
# Load annotations
################################################################################

coco = COCO(query_file)
coco_captions = COCO(caption_file)
imgid_list = coco.getImgIds()

################################################################################
# Load vocabulary
################################################################################

if sys.argv[1] == "glove":
    filename = './exp-referit/data/glove.6B.50d.txt'

    def loadGloVe(filename):
        vocab = []
        embd = []
        file = open(filename, 'r')
        for line in file.readlines():