# Compute descriptors for both object images and distractor images
image_path_list = [image_dir+imname+'.JPEG' for imname in imlist]
distractor_path_list = [distractor_dir+imname+'.JPEG' for imname in distractor_list]

obj_descriptors = captioner.compute_descriptors(image_path_list)
dis_descriptors = captioner.compute_descriptors(distractor_path_list)

################################################################################
# Test top-1 precision
correct_num = 0
total_num = 0
for n_im in range(num_im):
    print('testing image %d / %d' % (n_im, num_im))
    imname = imlist[n_im]
    for sentence in query_dict[imname]:
        # compute test image (target object) score given the description sentence
        obj_score = retriever.score_descriptors(obj_descriptors[n_im:n_im+1, :],
                                                sentence, captioner, vocab_dict)[0]
        # compute distractor scores given the description sentence
        dis_idx = distractor_ids_per_im[imname]
        dis_scores = retriever.score_descriptors(dis_descriptors[dis_idx, :],
                                                 sentence, captioner, vocab_dict)

        # for a retrieval to be correct, the object image must score higher than
        # all distractor images
        correct_num += np.all(obj_score > dis_scores)
        total_num += 1

print('Top-1 precision on the whole test set: %f' % (correct_num/total_num))
################################################################################
    num_proposal = candidate_boxes.shape[0]
    for n_imcrop in range(num_imcrop):
        imcrop_name = imcrop_names[n_imcrop]
        if imcrop_name not in query_dict:
            continue
        gt_bbox = np.array(imcrop_bbox_dict[imcrop_name])
        IoUs = retriever.compute_iou(candidate_boxes, gt_bbox)
        for n_sentence in range(len(query_dict[imcrop_name])):
            sentence = query_dict[imcrop_name][n_sentence]
            # Scores for each candidate region
            if use_context:
                scores = retriever.score_descriptors_context(
                    descriptors, sentence, context_features_dict[imname],
                    captioner, vocab_dict)
            else:
                scores = retriever.score_descriptors(descriptors, sentence,
                                                     captioner, vocab_dict)

            # Evaluate the correctness of top K predictions
            topK_ids = np.argsort(-scores)[:K]
            topK_IoUs = IoUs[topK_ids]
            # whether the K-th (ranking from high to low) candidate is correct
            topK_is_correct = np.zeros(K, dtype=bool)
            topK_is_correct[:len(topK_ids)] = (topK_IoUs >=
                                               correct_IoU_threshold)
            # whether at least one of the top K candidates is correct
            topK_any_correct = (np.cumsum(topK_is_correct) > 0)
            topK_correct_num += topK_any_correct
            total_num += 1

    # print intermediate results during testing
    if (n_im + 1) % 1000 == 0:
    num_imcrop = len(imcrop_names)
    num_proposal = candidate_boxes.shape[0]
    for n_imcrop in range(num_imcrop):
        imcrop_name = imcrop_names[n_imcrop]
        if imcrop_name not in query_dict:
            continue
        gt_bbox = np.array(imcrop_bbox_dict[imcrop_name])
        IoUs = retriever.compute_iou(candidate_boxes, gt_bbox)
        for n_sentence in range(len(query_dict[imcrop_name])):
            sentence = query_dict[imcrop_name][n_sentence]
            # Scores for each candidate region
            if use_context:
                scores = retriever.score_descriptors_context(descriptors, sentence,
                    context_features_dict[imname], captioner, vocab_dict)
            else:
                scores = retriever.score_descriptors(descriptors, sentence,
                    captioner, vocab_dict)

            # Evaluate the correctness of top K predictions
            topK_ids = np.argsort(-scores)[:K]
            topK_IoUs = IoUs[topK_ids]
            # whether the K-th (ranking from high to low) candidate is correct
            topK_is_correct = np.zeros(K, dtype=bool)
            topK_is_correct[:len(topK_ids)] = (topK_IoUs >= correct_IoU_threshold)
            # whether at least one of the top K candidates is correct
            topK_any_correct = (np.cumsum(topK_is_correct) > 0)
            topK_correct_num += topK_any_correct
            total_num += 1

    # print intermediate results during testing
    if (n_im+1) % 1000 == 0:
        print('Recall on first %d test images' % (n_im+1))
]

obj_descriptors = captioner.compute_descriptors(image_path_list)
dis_descriptors = captioner.compute_descriptors(distractor_path_list)

################################################################################
# Test top-1 precision
correct_num = 0
total_num = 0
for n_im in range(num_im):
    print('testing image %d / %d' % (n_im, num_im))
    imname = imlist[n_im]
    for sentence in query_dict[imname]:
        # compute test image (target object) score given the description sentence
        obj_score = retriever.score_descriptors(
            obj_descriptors[n_im:n_im + 1, :], sentence, captioner,
            vocab_dict)[0]
        # compute distractor scores given the description sentence
        dis_idx = distractor_ids_per_im[imname]
        dis_scores = retriever.score_descriptors(dis_descriptors[dis_idx, :],
                                                 sentence, captioner,
                                                 vocab_dict)

        # for a retrieval to be correct, the object image must score higher than
        # all distractor images
        correct_num += np.all(obj_score > dis_scores)
        total_num += 1

print('Top-1 precision on the whole test set: %f' % (correct_num / total_num))
################################################################################