#query = 'bike on the red house' print("query =", query) print("Find best candidate..!") for i in range(8): im_file = './splited_image/test' + str(i) + '.jpg' edgebox_file = './proposal_box/selective_box' + str( i) + '.txt' # pre-extracted EdgeBox proposals im = skimage.io.imread(im_file) imsize = np.array([im.shape[1], im.shape[0]]) # [width, height] candidate_boxes = np.loadtxt(edgebox_file).astype(int) candidate_boxes = np.reshape(candidate_boxes, (-1, 4)) # Compute features region_feature = retriever.compute_descriptors_edgebox( captioner, im, candidate_boxes) spatial_feature = retriever.compute_spatial_feat( candidate_boxes, imsize) descriptors = np.concatenate((region_feature, spatial_feature), axis=1) context_feature = captioner.compute_descriptors([im], output_name='fc7') # Compute scores of each candidate region scores = retriever.score_descriptors_context(descriptors, query, context_feature, captioner, vocab_dict) #candidate_boxes = (i, candidate_boxes) candidate_boxes = np.insert(candidate_boxes, 0, i, axis=1) if (i == 0): sum_candidate_box = candidate_boxes else: #sum_candidate_box=np.concatenate(sum_candidate_box,candidate_boxes,axis=1) sum_candidate_box = np.vstack((sum_candidate_box, candidate_boxes))
sample_im = num_im for n_im in range(sample_im): print('testing image %d / %d' % (n_im, num_im)) imname = imlist[n_im] # gt imcrop_names = imcrop_dict[imname] candidate_boxes = candidate_boxes_dict[imname] im = skimage.io.imread(image_dir + imname + '.jpg') imsize = np.array([im.shape[1], im.shape[0]]) # [width, height] # Compute local descriptors (local image feature + spatial feature) descriptors = retriever.compute_descriptors_edgebox( captioner, im, candidate_boxes, 'fc7') # (100,4096) spatial_feats = retriever.compute_spatial_feat(candidate_boxes, imsize) # (100,8) np.savez('./data/ReferIt/referit_proposal_feature/' + imname, spatial_feat=spatial_feats, local_feature=descriptors) # print intermediate results during testing if (n_im + 1) % 1000 == 0: print('Recall on first %d test images' % (n_im + 1)) for k in [0, 10 - 1]: print('\trecall @ %d = %f' % (k + 1, topK_correct_num[k] / total_num)) print('Final recall on the whole test set') for k in [0, 10 - 1]: print('\trecall @ %d = %f' % (k + 1, topK_correct_num[k] / total_num)) ################################################################################
imset = set(util.io.load_str_list(trn_imlist_file)) vocab_dict = retriever.build_vocab_dict_from_file(vocab_file) query_dict = util.io.load_json(query_file) imsize_dict = util.io.load_json(imsize_dict_file) imcrop_bbox_dict = util.io.load_json(imcrop_bbox_dict_file) train_pairs = [] for imcrop_name, des in query_dict.iteritems(): imname = imcrop_name.split('_', 1)[0] if imname not in imset: continue imsize = np.array(imsize_dict[imname]) bbox = np.array(imcrop_bbox_dict[imcrop_name]) # spatial info bbox_feat = retriever.compute_spatial_feat(bbox, imsize) context_feature = np.load(cached_context_features_dir + imname + '_fc7.npy') local_feaure = np.load(cache_local_features_dir + imcrop_name + '.png_fc7.npy') train_pairs += [(imcrop_name, d, bbox_feat, imname, context_feature, local_feature) for d in des] # random shuffle training pairs np.random.seed(3) perm_idx = np.random.permutation(np.arange(len(train_pairs))) train_pairs = [train_pairs[n] for n in perm_idx] num_train_pairs = len(train_pairs) num_train_pairs = num_train_pairs - num_train_pairs % N_batch train_pairs = train_pairs[:num_train_pairs] num_batch = int(num_train_pairs // N_batch) imcrop_list = []
K = 100 # evaluate recall at 1, 2, ..., K topK_correct_num = np.zeros(K, dtype=np.float32) total_num = 0 for n_im in range(num_im): print('testing image %d / %d' % (n_im, num_im)) imname = imlist[n_im] imcrop_names = imcrop_dict[imname] candidate_boxes = candidate_boxes_dict[imname] im = skimage.io.imread(image_dir + imname + '.jpg') imsize = np.array([im.shape[1], im.shape[0]]) # [width, height] # Compute local descriptors (local image feature + spatial feature) descriptors = retriever.compute_descriptors_edgebox(captioner, im, candidate_boxes) spatial_feats = retriever.compute_spatial_feat(candidate_boxes, imsize) descriptors = np.concatenate((descriptors, spatial_feats), axis=1) num_imcrop = len(imcrop_names) num_proposal = candidate_boxes.shape[0] for n_imcrop in range(num_imcrop): imcrop_name = imcrop_names[n_imcrop] if imcrop_name not in query_dict: continue gt_bbox = np.array(imcrop_bbox_dict[imcrop_name]) IoUs = retriever.compute_iou(candidate_boxes, gt_bbox) for n_sentence in range(len(query_dict[imcrop_name])): sentence = query_dict[imcrop_name][n_sentence] # Scores for each candidate region if use_context: scores = retriever.score_descriptors_context(descriptors, sentence,
imset = set(util.io.load_str_list(trn_imlist_file)) vocab_dict = retriever.build_vocab_dict_from_file(vocab_file) query_dict = util.io.load_json(query_file) imsize_dict = util.io.load_json(imsize_dict_file) imcrop_bbox_dict = util.io.load_json(imcrop_bbox_dict_file) train_pairs = [] for imcrop_name, des in query_dict.iteritems(): imname = imcrop_name.split('_', 1)[0] if imname not in imset: continue imsize = np.array(imsize_dict[imname]) bbox = np.array(imcrop_bbox_dict[imcrop_name]) # spatial info bbox_feat = retriever.compute_spatial_feat(bbox, imsize) if os.path.isfile(cached_context_features_dir + imname + '_fc7.npy'): context_feature = np.load(cached_context_features_dir + imname + '_fc7.npy') if os.path.isfile(cached_local_features_dir + imcrop_name + '.png_fc7.npy'): local_feature = np.load(cached_local_features_dir + imcrop_name + '.png_fc7.npy') train_pairs += [(imcrop_name, d, bbox_feat, imname, context_feature, local_feature) for d in des] # random shuffle training pairs np.random.seed(3) perm_idx = np.random.permutation(np.arange(len(train_pairs))) train_pairs = [train_pairs[n] for n in perm_idx] num_train_pairs = len(train_pairs) num_train_pairs = num_train_pairs - num_train_pairs % N_batch train_pairs = train_pairs[:num_train_pairs] num_batch = int(num_train_pairs // N_batch)