def visualize_top_predicted_bbox(self, pred_sample, coco_image_dir): """Visualize the top predicted bounding box.""" assert 'annotation_id' in pred_sample, 'Object annotation id missing!' assert 'predicted_bounding_boxes' in pred_sample, \ 'list of predicted bounding boxes missing!' if not pred_sample['predicted_bounding_boxes']: print 'Empty predicted bounding boxes.' return bbox_pred_top = pred_sample['predicted_bounding_boxes'][0] ann_id = pred_sample['annotation_id'] ann = self.refexp_dataset.loadAnns(ids=[ann_id])[0] image_id = ann['image_id'] img_coco = self.refexp_dataset.loadImgs(ids=[image_id])[0] iou = cu.iou_bboxes(bbox_pred_top, ann['bbox']) if 'refexp' in pred_sample or 'refexp_id' in pred_sample: print 'The Referring expression input to the model is:' if 'refexp' in pred_sample: print ' ' + pred_sample['refexp'] else: refexp_tmp = self.refexp_dataset.loadRefexps(ids=pred_sample['refexp_id'])[0] print ' ' + refexp_tmp['raw'] I = misc.imread(os.path.join(coco_image_dir, (img_coco['file_name']))) ax = plt.imshow(I) ax = plt.axis('off') ax = plt.title('IoU: %.3f, green bbox: GT, red bbox: predicted' % iou) cu.draw_bbox(plt.gca(), ann['bbox'], edge_color='green') cu.draw_bbox(plt.gca(), bbox_pred_top, edge_color='red')
def visualize_top_predicted_bbox(self, pred_sample, coco_image_dir): """Visualize the top predicted bounding box.""" assert 'annotation_id' in pred_sample, 'Object annotation id missing!' assert 'predicted_bounding_boxes' in pred_sample, \ 'list of predicted bounding boxes missing!' if not pred_sample['predicted_bounding_boxes']: print 'Empty predicted bounding boxes.' return bbox_pred_top = pred_sample['predicted_bounding_boxes'][0] ann_id = pred_sample['annotation_id'] ann = self.refexp_dataset.loadAnns(ids=[ann_id])[0] image_id = ann['image_id'] img_coco = self.refexp_dataset.loadImgs(ids=[image_id])[0] iou = cu.iou_bboxes(bbox_pred_top, ann['bbox']) if 'refexp' in pred_sample or 'refexp_id' in pred_sample: print 'The Referring expression input to the model is:' if 'refexp' in pred_sample: print ' ' + pred_sample['refexp'] else: refexp_tmp = self.refexp_dataset.loadRefexps( ids=pred_sample['refexp_id'])[0] print ' ' + refexp_tmp['raw'] I = misc.imread(os.path.join(coco_image_dir, (img_coco['file_name']))) ax = plt.imshow(I) ax = plt.axis('off') ax = plt.title('IoU: %.3f, green bbox: GT, red bbox: predicted' % iou) cu.draw_bbox(plt.gca(), ann['bbox'], edge_color='green') cu.draw_bbox(plt.gca(), bbox_pred_top, edge_color='red')
def get_streams(self): ((image_filename, image_id), object_id_list, line) = self.image_refexp_pairs[self.index] if image_id in self.dataset.imgs_with_errors: line = EOS_IDENTIFIER stream = get_encoded_line(line, self.vocabulary) # Assumes stream has EOS word at the end assert (stream[-1] == self.vocabulary[EOS_IDENTIFIER]) stream = stream[:-1] filtered_stream = [] for word in stream: if word != self.vocabulary[UNK_IDENTIFIER]: filtered_stream.append(word) stream = filtered_stream if self.truncate and len(stream) >= self.max_words: stream = stream[:self.max_words - 1] self.num_truncates += 1 object_id = object_id_list[1] object_ann = self.dataset.loadAnns(object_id)[0] object_category = self.dataset.loadCats( object_ann['category_id'])[0]['name'] object_bbox = self.dataset.loadAnns(object_id)[0]['bbox'] context_anns_of_same_category = [] context_anns_of_diff_category = [] if hasattr(self.dataset, 'coco'): all_anns = self.dataset.coco.imgToAnns[image_id] else: all_anns = self.dataset.imgToAnns[image_id] for ann in all_anns: if ann['id'] != object_id: if ann['category_id'] == object_ann['category_id']: context_anns_of_same_category.append(ann) else: context_anns_of_diff_category.append(ann) neg_anns_of_same_category = [] neg_anns_of_diff_category = [] if self.neg_proposal_source != 'gt': image_info = self.dataset.loadImgs(image_id)[0] all_anns = image_info['region_candidates'] for ann in all_anns: ann['bbox'] = ann['bounding_box'] ann_box = ann['bbox'] iou = iou_bboxes(ann_box, object_bbox) if iou < 0.5 and ann[ 'predicted_object_name'] == object_category: neg_anns_of_same_category.append(ann) elif ann['predicted_object_name'] != object_category: neg_anns_of_diff_category.append(ann) else: neg_anns_of_same_category = context_anns_of_same_category neg_anns_of_diff_category = context_anns_of_diff_category # subtract one because image is reserved as one context region if len(context_anns_of_same_category) > self.max_num_context - 1: rand_sample = sorted( random.sample(range(len(context_anns_of_same_category)), self.max_num_context - 1)) context_anns_of_same_category = [ context_anns_of_same_category[idx] for idx in rand_sample ] elif len(context_anns_of_same_category) < self.max_num_context - 1: rand_sample = sorted( random.sample( range(len(context_anns_of_diff_category)), min( self.max_num_context - 1 - len(context_anns_of_same_category), len(context_anns_of_diff_category)))) context_anns_of_same_category += [ context_anns_of_diff_category[idx] for idx in rand_sample ] if len(neg_anns_of_same_category) > self.max_num_negatives: rand_sample = sorted( random.sample(range(len(neg_anns_of_same_category)), self.max_num_negatives)) neg_anns_of_same_category = [ neg_anns_of_same_category[idx] for idx in rand_sample ] elif len(neg_anns_of_same_category) < self.max_num_negatives: rand_sample = sorted( random.sample( range(len(neg_anns_of_diff_category)), min( self.max_num_negatives - len(neg_anns_of_same_category), len(neg_anns_of_diff_category)))) neg_anns_of_same_category += [ neg_anns_of_diff_category[idx] for idx in rand_sample ] # If we are running short of proposal negatives, sample from gt negatives if len( neg_anns_of_same_category ) < self.max_num_negatives and self.neg_proposal_source != 'gt': rand_sample = sorted( random.sample( range(len(context_anns_of_diff_category)), min( self.max_num_negatives - len(neg_anns_of_same_category), len(context_anns_of_diff_category)))) neg_anns_of_same_category += [ context_anns_of_diff_category[idx] for idx in rand_sample ] pad = self.max_words - (len(stream) + 1) if self.pad else 0 if pad > 0: self.num_pads += 1 out = {} timestep_input = np.asarray( [[self.vocabulary[EOS_IDENTIFIER]] + stream + [-1] * pad], np.float16) out['timestep_input'] = np.tile(timestep_input.T, (1, self.max_num_context)) timestep_cont = np.asarray([[0] + [1] * len(stream) + [0] * pad], np.float16) out['timestep_cont'] = np.tile(timestep_cont.T, (1, self.max_num_context)) timestep_target = np.asarray( stream + [self.vocabulary[EOS_IDENTIFIER]] + [-1] * pad, np.float16) out['timestep_target'] = timestep_target self.swap_axis_streams.add('timestep_input') self.swap_axis_streams.add('timestep_target') self.swap_axis_streams.add('timestep_cont') # Write image features to batch img_info = self.dataset.loadImgs(image_id)[0] img_wd = float(img_info['width']) img_ht = float(img_info['height']) assert (len(object_id_list) <= 2) fc7_img = self.dataset.image_features[str( (image_id, [0, 0, int(img_wd - 1), int(img_ht - 1)]))][0] out['fc7_img'] = np.tile(fc7_img, (self.max_num_context, 1)) img_bbox_features = np.zeros((self.max_num_context, 5), np.float16) img_bbox_features[:] = [0, 0, 1, 1, 1] out['img_bbox_features'] = img_bbox_features # Write object region features to batch object_bbox = self.dataset.loadAnns(object_id)[0]['bbox'] fc7_obj = self.dataset.image_features[str((image_id, object_bbox))][0] out['fc7_obj'] = np.tile(fc7_obj, (self.max_num_context, 1)) bbox_area_ratio = (object_bbox[2] * object_bbox[3]) / (img_wd * img_ht) bbox_x1y1x2y2 = [ object_bbox[0] / img_wd, object_bbox[1] / img_ht, (object_bbox[0] + object_bbox[2]) / img_wd, (object_bbox[1] + object_bbox[3]) / img_ht ] bbox_features = bbox_x1y1x2y2 + [bbox_area_ratio] out['bbox_features'] = np.tile(bbox_features, (self.max_num_context, 1)) # Write context features to batch context_fc7 = np.tile(fc7_img, (self.max_num_context, 1)) context_bbox_features = np.zeros((self.max_num_context, 5), np.float16) context_bbox_features[:] = [0, 0, 1, 1, 1] if len(context_anns_of_same_category) > 0: other_bboxes = [ ann['bbox'] for ann in context_anns_of_same_category ] for idx, other_bbox in enumerate(other_bboxes): other_bbox_area_ratio = (other_bbox[2] * other_bbox[3]) / (img_wd * img_ht) other_bbox_x1y1x2y2 = [ other_bbox[0] / img_wd, other_bbox[1] / img_ht, (other_bbox[0] + other_bbox[2]) / img_wd, (other_bbox[1] + other_bbox[3]) / img_ht ] other_bbox_features = other_bbox_x1y1x2y2 + [ other_bbox_area_ratio ] context_fc7[idx, :] = self.dataset.image_features[str( (image_id, other_bbox))][0] context_bbox_features[idx, :] = other_bbox_features out['context_fc7'] = context_fc7 out['context_bbox_features'] = context_bbox_features # Write negative features to batch negative_fc7 = np.zeros( (self.max_num_negatives, self.dataset.image_feature_length), np.float16) negative_bbox_features = np.zeros((self.max_num_negatives, 5), np.float16) if len(neg_anns_of_same_category) > 0: other_bboxes = [ann['bbox'] for ann in neg_anns_of_same_category] for idx, other_bbox in enumerate(other_bboxes): other_bbox_area_ratio = (other_bbox[2] * other_bbox[3]) / (img_wd * img_ht) other_bbox_x1y1x2y2 = [ other_bbox[0] / img_wd, other_bbox[1] / img_ht, (other_bbox[0] + other_bbox[2]) / img_wd, (other_bbox[1] + other_bbox[3]) / img_ht ] other_bbox_features = other_bbox_x1y1x2y2 + [ other_bbox_area_ratio ] negative_fc7[idx, :] = self.dataset.image_features[str( (image_id, other_bbox))][0] negative_bbox_features[idx, :] = other_bbox_features out['negative_fc7'] = negative_fc7 out['negative_bbox_features'] = negative_bbox_features pairwise_similarity = np.asarray([[0] * self.max_num_negatives], np.float16) out['pairwise_similarity'] = np.tile(pairwise_similarity, (self.max_words, 1)) self.swap_axis_streams.add('pairwise_similarity') self.num_outs += 1 self.next_line() return out
def evaluate(self, pred_results_path, thresh_iou=0.5, thresh_k=1, flag_ignore_non_existed_object=False, flag_ignore_non_existed_gt_refexp=False, flag_missing_objects_verbose=False, flag_missing_refexps_verbose=False): """Evaluate the predicted results for the comprehension task. Args: pred_results_path: path for the predicted results with the format described in ./cache_evaluation/format_comprehension_eval.md thresh_iou: threshold of the IoU ratio of the evaluation thresh_k: precision@k flag_ignore_non_existed_object: if set True, the evaluation process continues with an warning when encountered non existed objects in self.refexp_dataset. Otherwise stops. flag_ignore_non_existed_gt_refexp: if set True, the evaluation process continues when encountered non existed GT referring expressions. Otherwise stops. flag_missing_objects_verbose: if set true, will list the ids of all the missing objects in self.refexp_dataset flag_missing_refexps_verbose: if set true, will list the ids of all the missing referring expressions in self.refexp_dataset Returns: A two element tuple. The first element is precision@k. The second element is the predicted results (a dictionary) with an added field 'best_iou' of the best iou for the top k bounding boxes. """ # Load predicted results self.reset_eval_state() print 'Loading predicted result file for the comprehension task.' with open(pred_results_path) as fin: self.pred_results = json.load(fin) # evaluation pred_ann_ids_set = set() pred_refexp_ids_set = set() score = 0.0 num_valid_pred = 0 for pred_elem in self.pred_results: # validate the predicted results assert 'annotation_id' in pred_elem, 'Object annotation id missing!' assert 'predicted_bounding_boxes' in pred_elem, \ 'list of predicted bounding boxes missing!' ann_id = pred_elem['annotation_id'] gt_bbox = self._get_GT_bbox_with_annotation_id(ann_id) # Need to check - change if gt_bbox is None: if flag_ignore_non_existed_object: print ('Ignore COCO annotation id %d which does not exist in ' 'Refexp dataset file for evaluation' % ann_id) pred_elem['best_iou'] = 0.0 continue else: print ('COCO annotation id %d does not exist in Refexp ' 'dataset file for evaluation!' % ann_id) raise if ('refexp_id' in pred_elem) and not(pred_elem['refexp_id'] in self.gt_refexp_ids_set): if flag_ignore_non_existed_gt_refexp: print ('Ignore refexp id %d which does not exist in ' 'Refexp dataset file for evaluation' % pred_elem['refexp_id']) pred_elem['best_iou'] = 0.0 continue else: print ('refexp id %d does not exist in Refexp ' 'dataset file for evaluation!' % pred_elem['refexp_id']) raise pred_ann_ids_set.add(ann_id) if 'refexp_id' in pred_elem: pred_refexp_ids_set.add(pred_elem['refexp_id']) num_valid_pred += 1 # check whether it is a correct prediction pred_bboxes = pred_elem['predicted_bounding_boxes'] best_iou = 0.0 for k in xrange(min(thresh_k, len(pred_bboxes))): iou = cu.iou_bboxes(pred_bboxes[k], gt_bbox) best_iou = max(best_iou, iou) if best_iou >= thresh_iou: score += 1.0 pred_elem['best_iou'] = best_iou score /= num_valid_pred # warning for missing objects and refexps gt_ann_ids_left_set = self.gt_ann_ids_set - pred_ann_ids_set gt_refexp_ids_left_set = self.gt_refexp_ids_set - pred_refexp_ids_set if gt_ann_ids_left_set: print ('Missing %d objects in the refexp dataset file in the predicted ' 'file' % len(gt_ann_ids_left_set)) if flag_missing_objects_verbose: print ('The missing object annotation ids are:') print gt_ann_ids_left_set # TODO pretty print format if gt_refexp_ids_left_set: print ('Missing %d refexps in the refexp dataset file in the predicted ' 'file' % len(gt_refexp_ids_left_set)) if flag_missing_refexps_verbose: print ('The missing refexp ids are:') print gt_refexp_ids_left_set # TODO pretty print format # summarize the results print 'The average prec@%d score is %.3f' % (thresh_k, score) return (score, self.pred_results)
def evaluate(self, pred_results_path, thresh_iou=0.5, thresh_k=1, flag_ignore_non_existed_object=False, flag_ignore_non_existed_gt_refexp=False, flag_missing_objects_verbose=False, flag_missing_refexps_verbose=False): """Evaluate the predicted results for the comprehension task. Args: pred_results_path: path for the predicted results with the format described in ./cache_evaluation/format_comprehension_eval.md thresh_iou: threshold of the IoU ratio of the evaluation thresh_k: precision@k flag_ignore_non_existed_object: if set True, the evaluation process continues with an warning when encountered non existed objects in self.refexp_dataset. Otherwise stops. flag_ignore_non_existed_gt_refexp: if set True, the evaluation process continues when encountered non existed GT referring expressions. Otherwise stops. flag_missing_objects_verbose: if set true, will list the ids of all the missing objects in self.refexp_dataset flag_missing_refexps_verbose: if set true, will list the ids of all the missing referring expressions in self.refexp_dataset Returns: A two element tuple. The first element is precision@k. The second element is the predicted results (a dictionary) with an added field 'best_iou' of the best iou for the top k bounding boxes. """ # Load predicted results self.reset_eval_state() print 'Loading predicted result file for the comprehension task.' with open(pred_results_path) as fin: self.pred_results = json.load(fin) # evaluation pred_ann_ids_set = set() pred_refexp_ids_set = set() score = 0.0 num_valid_pred = 0 for pred_elem in self.pred_results: # validate the predicted results assert 'annotation_id' in pred_elem, 'Object annotation id missing!' assert 'predicted_bounding_boxes' in pred_elem, \ 'list of predicted bounding boxes missing!' ann_id = pred_elem['annotation_id'] gt_bbox = self._get_GT_bbox_with_annotation_id( ann_id) # Need to check - change if gt_bbox is None: if flag_ignore_non_existed_object: print( 'Ignore COCO annotation id %d which does not exist in ' 'Refexp dataset file for evaluation' % ann_id) pred_elem['best_iou'] = 0.0 continue else: print( 'COCO annotation id %d does not exist in Refexp ' 'dataset file for evaluation!' % ann_id) raise if ('refexp_id' in pred_elem) and not (pred_elem['refexp_id'] in self.gt_refexp_ids_set): if flag_ignore_non_existed_gt_refexp: print( 'Ignore refexp id %d which does not exist in ' 'Refexp dataset file for evaluation' % pred_elem['refexp_id']) pred_elem['best_iou'] = 0.0 continue else: print( 'refexp id %d does not exist in Refexp ' 'dataset file for evaluation!' % pred_elem['refexp_id']) raise pred_ann_ids_set.add(ann_id) if 'refexp_id' in pred_elem: pred_refexp_ids_set.add(pred_elem['refexp_id']) num_valid_pred += 1 # check whether it is a correct prediction pred_bboxes = pred_elem['predicted_bounding_boxes'] best_iou = 0.0 for k in xrange(min(thresh_k, len(pred_bboxes))): iou = cu.iou_bboxes(pred_bboxes[k], gt_bbox) best_iou = max(best_iou, iou) if best_iou >= thresh_iou: score += 1.0 pred_elem['best_iou'] = best_iou score /= num_valid_pred # warning for missing objects and refexps gt_ann_ids_left_set = self.gt_ann_ids_set - pred_ann_ids_set gt_refexp_ids_left_set = self.gt_refexp_ids_set - pred_refexp_ids_set if gt_ann_ids_left_set: print( 'Missing %d objects in the refexp dataset file in the predicted ' 'file' % len(gt_ann_ids_left_set)) if flag_missing_objects_verbose: print('The missing object annotation ids are:') print gt_ann_ids_left_set # TODO pretty print format if gt_refexp_ids_left_set: print( 'Missing %d refexps in the refexp dataset file in the predicted ' 'file' % len(gt_refexp_ids_left_set)) if flag_missing_refexps_verbose: print('The missing refexp ids are:') print gt_refexp_ids_left_set # TODO pretty print format # summarize the results print 'The average prec@%d score is %.3f' % (thresh_k, score) return (score, self.pred_results)
def get_streams(self): ((image_filename, image_id), object_id_list, line) = self.image_refexp_pairs[self.index] if image_id in self.dataset.imgs_with_errors: line = EOS_IDENTIFIER stream = get_encoded_line(line, self.vocabulary) # Assumes stream has EOS word at the end assert (stream[-1] == self.vocabulary[EOS_IDENTIFIER]) stream = stream[:-1] filtered_stream = [] for word in stream: if word != self.vocabulary[UNK_IDENTIFIER]: filtered_stream.append(word) stream = filtered_stream if self.truncate and len(stream) >= self.max_words: stream = stream[:self.max_words-1] self.num_truncates += 1 object_id = object_id_list[1] object_ann = self.dataset.loadAnns(object_id)[0] object_category = self.dataset.loadCats(object_ann['category_id'])[0]['name'] object_bbox = self.dataset.loadAnns(object_id)[0]['bbox'] context_anns_of_same_category = [] context_anns_of_diff_category = [] if hasattr(self.dataset, 'coco'): all_anns = self.dataset.coco.imgToAnns[image_id] else: all_anns = self.dataset.imgToAnns[image_id] for ann in all_anns: if ann['id'] != object_id: if ann['category_id'] == object_ann['category_id']: context_anns_of_same_category.append(ann) else: context_anns_of_diff_category.append(ann) neg_anns_of_same_category = [] neg_anns_of_diff_category = [] if self.neg_proposal_source != 'gt': image_info = self.dataset.loadImgs(image_id)[0] all_anns = image_info['region_candidates'] for ann in all_anns: ann['bbox'] = ann['bounding_box'] ann_box = ann['bbox'] iou = iou_bboxes(ann_box, object_bbox) if iou < 0.5 and ann['predicted_object_name'] == object_category: neg_anns_of_same_category.append(ann) elif ann['predicted_object_name'] != object_category: neg_anns_of_diff_category.append(ann) else: neg_anns_of_same_category = context_anns_of_same_category neg_anns_of_diff_category = context_anns_of_diff_category # subtract one because image is reserved as one context region if len(context_anns_of_same_category) > self.max_num_context-1: rand_sample = sorted(random.sample(range(len(context_anns_of_same_category)), self.max_num_context - 1)) context_anns_of_same_category = [context_anns_of_same_category[idx] for idx in rand_sample] elif len(context_anns_of_same_category) < self.max_num_context-1: rand_sample = sorted(random.sample(range(len(context_anns_of_diff_category)), min(self.max_num_context - 1 - len(context_anns_of_same_category), len(context_anns_of_diff_category)))) context_anns_of_same_category += [context_anns_of_diff_category[idx] for idx in rand_sample] if len(neg_anns_of_same_category) > self.max_num_negatives: rand_sample = sorted(random.sample(range(len(neg_anns_of_same_category)),self.max_num_negatives)) neg_anns_of_same_category = [neg_anns_of_same_category[idx] for idx in rand_sample] elif len(neg_anns_of_same_category) < self.max_num_negatives: rand_sample = sorted(random.sample(range(len(neg_anns_of_diff_category)), min(self.max_num_negatives-len(neg_anns_of_same_category), len(neg_anns_of_diff_category)))) neg_anns_of_same_category += [neg_anns_of_diff_category[idx] for idx in rand_sample] # If we are running short of proposal negatives, sample from gt negatives if len(neg_anns_of_same_category) < self.max_num_negatives and self.neg_proposal_source != 'gt': rand_sample = sorted(random.sample(range(len(context_anns_of_diff_category)), min(self.max_num_negatives-len(neg_anns_of_same_category), len(context_anns_of_diff_category)))) neg_anns_of_same_category += [context_anns_of_diff_category[idx] for idx in rand_sample] pad = self.max_words - (len(stream) + 1) if self.pad else 0 if pad > 0: self.num_pads += 1 out = {} timestep_input = np.asarray([[self.vocabulary[EOS_IDENTIFIER]] + stream + [-1] * pad], np.float16) out['timestep_input'] = np.tile(timestep_input.T, (1,self.max_num_context)) timestep_cont = np.asarray([[0] + [1] * len(stream) + [0] * pad], np.float16) out['timestep_cont'] = np.tile(timestep_cont.T, (1,self.max_num_context)) timestep_target = np.asarray(stream + [self.vocabulary[EOS_IDENTIFIER]] + [-1] * pad, np.float16) out['timestep_target'] = timestep_target self.swap_axis_streams.add('timestep_input') self.swap_axis_streams.add('timestep_target') self.swap_axis_streams.add('timestep_cont') # Write image features to batch img_info = self.dataset.loadImgs(image_id)[0] img_wd = float(img_info['width']) img_ht = float(img_info['height']) assert(len(object_id_list) <= 2) fc7_img = self.dataset.image_features[str((image_id, [0, 0, int(img_wd - 1), int(img_ht - 1)]))][0] out['fc7_img'] = np.tile(fc7_img, (self.max_num_context, 1)) img_bbox_features = np.zeros((self.max_num_context, 5), np.float16) img_bbox_features[:] = [0,0,1,1,1] out['img_bbox_features'] = img_bbox_features # Write object region features to batch object_bbox = self.dataset.loadAnns(object_id)[0]['bbox'] fc7_obj = self.dataset.image_features[str((image_id, object_bbox))][0] out['fc7_obj'] = np.tile(fc7_obj, (self.max_num_context, 1)) bbox_area_ratio = (object_bbox[2] * object_bbox[3]) / (img_wd * img_ht) bbox_x1y1x2y2 = [object_bbox[0] / img_wd, object_bbox[1] / img_ht, (object_bbox[0] + object_bbox[2]) / img_wd, (object_bbox[1] + object_bbox[3]) / img_ht] bbox_features = bbox_x1y1x2y2 + [bbox_area_ratio] out['bbox_features'] = np.tile(bbox_features, (self.max_num_context, 1)) # Write context features to batch context_fc7 = np.tile(fc7_img, (self.max_num_context, 1)) context_bbox_features = np.zeros((self.max_num_context, 5), np.float16) context_bbox_features[:] = [0,0,1,1,1] if len(context_anns_of_same_category) > 0: other_bboxes = [ann['bbox'] for ann in context_anns_of_same_category] for idx, other_bbox in enumerate(other_bboxes): other_bbox_area_ratio = (other_bbox[2] * other_bbox[3]) / (img_wd * img_ht) other_bbox_x1y1x2y2 = [other_bbox[0] / img_wd, other_bbox[1] / img_ht, (other_bbox[0] + other_bbox[2]) / img_wd, (other_bbox[1] + other_bbox[3]) / img_ht] other_bbox_features = other_bbox_x1y1x2y2 + [other_bbox_area_ratio] context_fc7[idx,:] = self.dataset.image_features[str((image_id, other_bbox))][0] context_bbox_features[idx,:] = other_bbox_features out['context_fc7'] = context_fc7 out['context_bbox_features'] = context_bbox_features # Write negative features to batch negative_fc7 = np.zeros((self.max_num_negatives, self.dataset.image_feature_length),np.float16) negative_bbox_features = np.zeros((self.max_num_negatives, 5),np.float16) if len(neg_anns_of_same_category) > 0: other_bboxes = [ann['bbox'] for ann in neg_anns_of_same_category] for idx, other_bbox in enumerate(other_bboxes): other_bbox_area_ratio = (other_bbox[2] * other_bbox[3]) / (img_wd * img_ht) other_bbox_x1y1x2y2 = [other_bbox[0] / img_wd, other_bbox[1] / img_ht, (other_bbox[0] + other_bbox[2]) / img_wd, (other_bbox[1] + other_bbox[3]) / img_ht] other_bbox_features = other_bbox_x1y1x2y2 + [other_bbox_area_ratio] negative_fc7[idx,:] = self.dataset.image_features[str((image_id, other_bbox))][0] negative_bbox_features[idx,:] = other_bbox_features out['negative_fc7'] = negative_fc7 out['negative_bbox_features'] = negative_bbox_features pairwise_similarity = np.asarray([[0] * self.max_num_negatives], np.float16) out['pairwise_similarity'] = np.tile(pairwise_similarity, (self.max_words,1)) self.swap_axis_streams.add('pairwise_similarity') self.num_outs += 1 self.next_line() return out