# Compute descriptors for both object images and distractor images image_path_list = [image_dir+imname+'.JPEG' for imname in imlist] distractor_path_list = [distractor_dir+imname+'.JPEG' for imname in distractor_list] obj_descriptors = captioner.compute_descriptors(image_path_list) dis_descriptors = captioner.compute_descriptors(distractor_path_list) ################################################################################ # Test top-1 precision correct_num = 0 total_num = 0 for n_im in range(num_im): print('testing image %d / %d' % (n_im, num_im)) imname = imlist[n_im] for sentence in query_dict[imname]: # compute test image (target object) score given the description sentence obj_score = retriever.score_descriptors(obj_descriptors[n_im:n_im+1, :], sentence, captioner, vocab_dict)[0] # compute distractor scores given the description sentence dis_idx = distractor_ids_per_im[imname] dis_scores = retriever.score_descriptors(dis_descriptors[dis_idx, :], sentence, captioner, vocab_dict) # for a retrieval to be correct, the object image must score higher than # all distractor images correct_num += np.all(obj_score > dis_scores) total_num += 1 print('Top-1 precision on the whole test set: %f' % (correct_num/total_num)) ################################################################################
num_proposal = candidate_boxes.shape[0] for n_imcrop in range(num_imcrop): imcrop_name = imcrop_names[n_imcrop] if imcrop_name not in query_dict: continue gt_bbox = np.array(imcrop_bbox_dict[imcrop_name]) IoUs = retriever.compute_iou(candidate_boxes, gt_bbox) for n_sentence in range(len(query_dict[imcrop_name])): sentence = query_dict[imcrop_name][n_sentence] # Scores for each candidate region if use_context: scores = retriever.score_descriptors_context( descriptors, sentence, context_features_dict[imname], captioner, vocab_dict) else: scores = retriever.score_descriptors(descriptors, sentence, captioner, vocab_dict) # Evaluate the correctness of top K predictions topK_ids = np.argsort(-scores)[:K] topK_IoUs = IoUs[topK_ids] # whether the K-th (ranking from high to low) candidate is correct topK_is_correct = np.zeros(K, dtype=bool) topK_is_correct[:len(topK_ids)] = (topK_IoUs >= correct_IoU_threshold) # whether at least one of the top K candidates is correct topK_any_correct = (np.cumsum(topK_is_correct) > 0) topK_correct_num += topK_any_correct total_num += 1 # print intermediate results during testing if (n_im + 1) % 1000 == 0:
num_imcrop = len(imcrop_names) num_proposal = candidate_boxes.shape[0] for n_imcrop in range(num_imcrop): imcrop_name = imcrop_names[n_imcrop] if imcrop_name not in query_dict: continue gt_bbox = np.array(imcrop_bbox_dict[imcrop_name]) IoUs = retriever.compute_iou(candidate_boxes, gt_bbox) for n_sentence in range(len(query_dict[imcrop_name])): sentence = query_dict[imcrop_name][n_sentence] # Scores for each candidate region if use_context: scores = retriever.score_descriptors_context(descriptors, sentence, context_features_dict[imname], captioner, vocab_dict) else: scores = retriever.score_descriptors(descriptors, sentence, captioner, vocab_dict) # Evaluate the correctness of top K predictions topK_ids = np.argsort(-scores)[:K] topK_IoUs = IoUs[topK_ids] # whether the K-th (ranking from high to low) candidate is correct topK_is_correct = np.zeros(K, dtype=bool) topK_is_correct[:len(topK_ids)] = (topK_IoUs >= correct_IoU_threshold) # whether at least one of the top K candidates is correct topK_any_correct = (np.cumsum(topK_is_correct) > 0) topK_correct_num += topK_any_correct total_num += 1 # print intermediate results during testing if (n_im+1) % 1000 == 0: print('Recall on first %d test images' % (n_im+1))
] obj_descriptors = captioner.compute_descriptors(image_path_list) dis_descriptors = captioner.compute_descriptors(distractor_path_list) ################################################################################ # Test top-1 precision correct_num = 0 total_num = 0 for n_im in range(num_im): print('testing image %d / %d' % (n_im, num_im)) imname = imlist[n_im] for sentence in query_dict[imname]: # compute test image (target object) score given the description sentence obj_score = retriever.score_descriptors( obj_descriptors[n_im:n_im + 1, :], sentence, captioner, vocab_dict)[0] # compute distractor scores given the description sentence dis_idx = distractor_ids_per_im[imname] dis_scores = retriever.score_descriptors(dis_descriptors[dis_idx, :], sentence, captioner, vocab_dict) # for a retrieval to be correct, the object image must score higher than # all distractor images correct_num += np.all(obj_score > dis_scores) total_num += 1 print('Top-1 precision on the whole test set: %f' % (correct_num / total_num)) ################################################################################