def test_max_distance_for_overlap(self): """Test that the distance ensures the IoU with random boxes.""" # TODO(vighneshb) remove this after the `_smallest_positive_root` # function if fixed. self.skipTest(('Skipping test because we are using an incorrect version of' 'the `max_distance_for_overlap` function to reproduce' ' results.')) rng = np.random.RandomState(0) n_samples = 100 width = rng.uniform(1, 100, size=n_samples) height = rng.uniform(1, 100, size=n_samples) min_iou = rng.uniform(0.1, 1.0, size=n_samples) max_dist = target_assigner.max_distance_for_overlap( height, width, min_iou) xmin1 = np.zeros(n_samples) ymin1 = np.zeros(n_samples) xmax1 = np.zeros(n_samples) + width ymax1 = np.zeros(n_samples) + height xmin2 = max_dist*np.cos(rng.uniform(0, 2*np.pi)) ymin2 = max_dist*np.sin(rng.uniform(0, 2*np.pi)) xmax2 = width + max_dist*np.cos(rng.uniform(0, 2*np.pi)) ymax2 = height + max_dist*np.sin(rng.uniform(0, 2*np.pi)) boxes1 = np.vstack([ymin1, xmin1, ymax1, xmax1]).T boxes2 = np.vstack([ymin2, xmin2, ymax2, xmax2]).T iou = np.diag(np_box_ops.iou(boxes1, boxes2)) self.assertTrue(np.all(iou >= min_iou))
def iou(boxlist1, boxlist2): """Computes pairwise intersection-over-union between box collections. Args: boxlist1: BoxList holding N boxes boxlist2: BoxList holding M boxes Returns: a numpy array with shape [N, M] representing pairwise iou scores. """ return np_box_ops.iou(boxlist1.get(), boxlist2.get())
def filter_annotations(img_all_annotations, used_classes): """Filters out annotations from the unused classes and dontcare regions. Filters out the annotations that belong to classes we do now wish to use and (optionally) also removes all boxes that overlap with dontcare regions. Args: img_all_annotations: A list of annotation dictionaries. See documentation of read_annotation_file for more details about the format of the annotations. used_classes: A list of strings listing the classes we want to keep, if the list contains "dontcare", all bounding boxes with overlapping with dont care regions will also be filtered out. Returns: img_filtered_annotations: A list of annotation dictionaries that have passed the filtering. """ img_filtered_annotations = {} # Filter the type of the objects. relevant_annotation_indices = [ i for i, x in enumerate(img_all_annotations['type']) if x in used_classes ] for key in img_all_annotations.keys(): img_filtered_annotations[key] = ( img_all_annotations[key][relevant_annotation_indices]) if 'dontcare' in used_classes: dont_care_indices = [ i for i, x in enumerate(img_filtered_annotations['type']) if x == 'dontcare' ] # bounding box format [y_min, x_min, y_max, x_max] all_boxes = np.stack([ img_filtered_annotations['2d_bbox_top'], img_filtered_annotations['2d_bbox_left'], img_filtered_annotations['2d_bbox_bottom'], img_filtered_annotations['2d_bbox_right'] ], axis=1) ious = iou(boxes1=all_boxes, boxes2=all_boxes[dont_care_indices]) # Remove all bounding boxes that overlap with a dontcare region. if ious.size > 0: boxes_to_remove = np.amax(ious, axis=1) > 0.0 for key in img_all_annotations.keys(): img_filtered_annotations[key] = (img_filtered_annotations[key][ np.logical_not(boxes_to_remove)]) return img_filtered_annotations
def iou(boxlist1, boxlist2): """Computes pairwise intersection-over-union between box collections. Args: boxlist1: BoxList holding N boxes boxlist2: BoxList holding M boxes Returns: a numpy array with shape [N, M] representing pairwise iou scores. """ return np_box_ops.iou(boxlist1.get(), boxlist2.get())
def filter_annotations(img_all_annotations, used_classes): """Filters out annotations from the unused classes and dontcare regions. Filters out the annotations that belong to classes we do now wish to use and (optionally) also removes all boxes that overlap with dontcare regions. Args: img_all_annotations: A list of annotation dictionaries. See documentation of read_annotation_file for more details about the format of the annotations. used_classes: A list of strings listing the classes we want to keep, if the list contains "dontcare", all bounding boxes with overlapping with dont care regions will also be filtered out. Returns: img_filtered_annotations: A list of annotation dictionaries that have passed the filtering. """ img_filtered_annotations = {} # Filter the type of the objects. relevant_annotation_indices = [ i for i, x in enumerate(img_all_annotations['type']) if x in used_classes ] for key in img_all_annotations.keys(): img_filtered_annotations[key] = ( img_all_annotations[key][relevant_annotation_indices]) if 'dontcare' in used_classes: dont_care_indices = [i for i, x in enumerate(img_filtered_annotations['type']) if x == 'dontcare'] # bounding box format [y_min, x_min, y_max, x_max] all_boxes = np.stack([img_filtered_annotations['2d_bbox_top'], img_filtered_annotations['2d_bbox_left'], img_filtered_annotations['2d_bbox_bottom'], img_filtered_annotations['2d_bbox_right']], axis=1) ious = iou(boxes1=all_boxes, boxes2=all_boxes[dont_care_indices]) # Remove all bounding boxes that overlap with a dontcare region. if ious.size > 0: boxes_to_remove = np.amax(ious, axis=1) > 0.0 for key in img_all_annotations.keys(): img_filtered_annotations[key] = ( img_filtered_annotations[key][np.logical_not(boxes_to_remove)]) return img_filtered_annotations
def evaluate_class(argmin, id2node, node2id, gt_ds, cls, thresh=0.5): total = 0 correct = 0 for i, imgname in enumerate(gt_ds['images']): roi = gt_ds['rois'][i] if cls in roi['classes']: total += 1 boxes = roi['boxes'][roi['classes'] == cls] node_id = node2id[imgname] argmin_box = roi['fea_boxes'][argmin[node_id]][None] if iou(argmin_box, boxes).max() > thresh: correct += 1 print("Class: {}, Total: {}, correct: {}, CorLoc: {}".format( cls, total, correct, float(correct) / total)) return float(correct) / total
def main(_): ds = read_pickle( os.path.join(FLAGS.ds_root, 'ImageSet', FLAGS.split + '.pkl')) name2class2feas = defaultdict(dict) class2names = defaultdict(set) for i, img in enumerate(ds['images']): if (i + 1) % 1000 == 0: print("Processing image {}/{}".format(i + 1, len(ds['images']))) if FLAGS.eval_fold is not None and ds['folds'][i] != FLAGS.eval_fold: continue roi = ds['rois'][i] fea_path = ds['det_feas'][i] fea = np.load(os.path.join(FLAGS.ds_root, 'Feas', fea_path)) if FLAGS.doublefeas: fea_path = ds['feas'][i] alexfea = np.load(os.path.join(FLAGS.ds_root, 'Feas', fea_path)) fea = np.concatenate((alexfea, fea), axis=3) assert (np.all(np.unique(roi['classes']) == roi['classes'])) #TODO last is removed because it is zero fea_indices = np.argmax(iou(roi['boxes'], roi['fea_boxes'][:-1]), axis=1) for j, cls in enumerate(roi['classes']): class2names[cls].add(img) name2class2feas[img][cls] = fea[fea_indices[j]] if FLAGS.random: name2class2feas[img][cls] = fea[np.random.choice( np.arange(fea.shape[0]))] if FLAGS.objectness: name2class2feas[img][cls] = fea[0] save_dict = { 'name2class2feas': name2class2feas, 'class2names': class2names } write_pickle( save_dict, os.path.join(FLAGS.ds_root, 'ImageSet', FLAGS.save_name + '.pkl'))
def filter_annotations(img_all_annotations, used_classes): img_filtered_annotations = {} # Filter the type of the objects. relevant_annotation_indices = [ i for i, x in enumerate(img_all_annotations['type']) if x in used_classes ] # print('type(relevant_annotation_indices)',type(relevant_annotation_indices)) # print(relevant_annotation_indices) for key in img_all_annotations.keys(): img_filtered_annotations[key] = ( img_all_annotations[key][relevant_annotation_indices]) if 'dontcare' in used_classes: dont_care_indices = [ i for i, x in enumerate(img_filtered_annotations['type']) if x == 'dontcare' ] # bounding box format [y_min, x_min, y_max, x_max] all_boxes = np.stack([ img_filtered_annotations['2d_bbox_top'], img_filtered_annotations['2d_bbox_left'], img_filtered_annotations['2d_bbox_bottom'], img_filtered_annotations['2d_bbox_right'] ], axis=1) ious = iou(boxes1=all_boxes, boxes2=all_boxes[dont_care_indices]) # Remove all bounding boxes that overlap with a dontcare region. if ious.size > 0: boxes_to_remove = np.amax(ious, axis=1) > 0.0 for key in img_all_annotations.keys(): img_filtered_annotations[key] = (img_filtered_annotations[key][ np.logical_not(boxes_to_remove)]) return img_filtered_annotations
def DEP_calc_iou_with_previous(region_id, bbox_stack_list, bbox_push_list, bbox_array): ''' use IOU algorithm to compare current with previous stack is a all bboxes from previous inferences (where something was detected) size = DEDUP_DEPTH SIMPLE - 1 object detected consider: b1 = np.array([[0.10, 0.20, 0.30, 0.40], [0.12, 0.22, 0.32, 0.42], [0.08, 0.18, 0.28, 0.38], [0.4, 0.6, 0.4, 0.6], [0.10, 0.2, 0.3, 0.4]]) b2 = np.array([[0.10, 0.20, 0.30, 0.40]]) # iou = [[1], [0.68], [0.68], [0.], [1]] average = 0.67 match_rates = iou(b1,b2).reshape(-1,) matches = np.argwhere(match_rates > 0.8).size COMPLEX - 2 objects detected (history = 2 object, 1 object, 2 objects) b1 = np.array([[0.1, 0.11, 0.2, 0.22], [0.3, 0.33, 0.4, 0.44], [0.1, 0.11, 0.2, 0.22], [0.1, 0.11, 0.2, 0.22], [0.3, 0.33, 0.4, 0.44]]) b2 = [[0.1, 0.11, 0.2, 0.22], [0.3, 0.33, 0.4, 0.44]] match_rates = iou(b1,b2) np.count_nonzero(match_rates[:,0] > 0.8) np.count_nonzero(match_rates[:,1] > 0.8) returns = IOU ''' # get the bbox_stack bbox_stack = bbox_stack_list[region_id] bbox = bbox_array.reshape(-1, 4) # print ("\n\n ---- BEFORE ----") # print ("Stack Before:", bbox_stack) # print ("bbox:", bbox) # print ("push list BEFORE:", bbox_push_list[region_id]) match_rates = iou(bbox_stack, bbox) det_obj_count = bbox.shape[0] # for the number of detected objects (1 object = 1 bb0x) # how many matches? match_counts = [] for i in range(det_obj_count): object_match_count = np.count_nonzero( match_rates[:, i] > bbox_iou_threshold) match_counts.append(object_match_count) # push match count to the bbox push list (so you know how many to pop off) # push list is nexted list - 1 list per region bbox_push_list[region_id].append( det_obj_count) # push the object count into the list objects_to_remove = bbox_push_list[region_id][ 0] # how many rows to remove from bbox_stack # zero based index so subtract 1 bbox_push_list[region_id].pop(0) # pop the first # print ("----- AFTER -----") # print ("-- bbox_stack ", bbox_stack.shape) # print ("-- bbox ", bbox.shape) bbox_stack_list[region_id] = np.append(bbox_stack, bbox, 0) # print ("-- box_stack_list-appended", region_id, bbox_stack_list[region_id].shape) # print ("Stack BEFORE delete:", bbox_stack_list[region_id]) # print ("Slice:", objects_to_remove) bbox_stack_list[region_id] = np.delete(bbox_stack_list[region_id], slice(0, objects_to_remove), 0) # print ("Match Counts:", match_counts) # print ("Stack After:", bbox_stack_list[region_id]) # print ("Push List:", bbox_push_list[region_id]) # print (" -- match counts:", match_counts) return match_counts
def compute_precision_recall_with_sequences(detection_file, db_file,detection_results=None,images_to_consider='all', get_night_day = None): if detection_results == None: print('Loading detection file...') with open(detection_file) as f: detection_results = pickle.load(f) im_to_seq = get_im_to_seq_map(db_file) seqs = {} for im in detection_results['images']: if im in im_to_seq: if im_to_seq[im] not in seqs: seqs[im_to_seq[im]] = [] seqs[im_to_seq[im]].append(im) print('Clustering detections by image...') use_im = get_images_to_consider(detection_results, images_to_consider, get_night_day) per_image_detections, per_image_gts = cluster_detections_by_image(detection_results, use_im) per_image_eval = per_image_evaluation.PerImageEvaluation( num_groundtruth_classes=1, matching_iou_threshold=0.5, nms_iou_threshold=1.0, nms_max_output_boxes=10000 ) print('Running per-image analysis...') detection_labels = [] detection_scores = [] num_total_gts = 0 count = 0 for seq in seqs: seq_detection_labels = [] seq_detection_scores = [] seq_num_gts = [] is_gt_in_seq = False max_seq_scores = [] valid_max_scores = [] #print(seq) for image_id in seqs[seq]: #for image_id, dets in per_image_detections.iteritems(): dets = per_image_detections[image_id] num_detections = len(dets['bboxes']) # [ymin, xmin, ymax, xmax] in absolute image coordinates. detected_boxes = np.zeros([num_detections, 4], dtype=np.float32) # detection scores for the boxes detected_scores = np.zeros([num_detections], dtype=np.float32) # 0-indexed detection classes for the boxes detected_class_labels = np.zeros([num_detections], dtype=np.int32) detected_masks = None count +=1 if count % 1000 == 0: print(str(count) + ' images complete') for i in range(num_detections): x1, y1, x2, y2 = dets['bboxes'][i] detected_boxes[i] = np.array([y1, x1, y2, x2]) detected_scores[i] = dets['scores'][i] detected_class_labels[i] = dets['labels'][i] - 1 max_seq_scores.append(np.max(detected_scores)) valid_max_scores.append(np.max(detected_scores)) box_id = np.argmax(detected_scores) gts = per_image_gts[image_id] num_gts = len(gts['bboxes']) #seq_num_gts.append(num_gts) #print(num_gts) if num_gts > 0: seq_num_gts.append(1) is_gt_in_seq = True # [ymin, xmin, ymax, xmax] in absolute image coordinates groundtruth_boxes = np.zeros([num_gts, 4], dtype=np.float32) # 0-indexed groundtruth classes for the boxes groundtruth_class_labels = np.zeros(num_gts, dtype=np.int32) groundtruth_masks = None groundtruth_is_difficult_list = np.zeros(num_gts, dtype=bool) groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool) for i in range(num_gts): x1, y1, x2, y2 = gts['bboxes'][i] groundtruth_boxes[i] = np.array([y1, x1, y2, x2]) groundtruth_class_labels[i] = gts['labels'][i] - 1 ious = np_box_ops.iou(detected_boxes,groundtruth_boxes) if np.max(ious[box_id, :]) < 0.5: valid_max_scores[-1] = 0 scores, tp_fp_labels, is_class_correctly_detected_in_image = ( per_image_eval.compute_object_detection_metrics( detected_boxes=detected_boxes, detected_scores=detected_scores, detected_class_labels=detected_class_labels, groundtruth_boxes=groundtruth_boxes, groundtruth_class_labels=groundtruth_class_labels, groundtruth_is_difficult_list=groundtruth_is_difficult_list, groundtruth_is_group_of_list=groundtruth_is_group_of_list, detected_masks=detected_masks, groundtruth_masks=groundtruth_masks ) ) seq_detection_labels.append(tp_fp_labels[0]) seq_detection_scores.append(scores[0]) #num_total_gts += 1 else: seq_num_gts.append(0) seq_detection_labels.append(np.zeros(num_detections, dtype=np.int32)) seq_detection_scores.append(detected_scores) valid_max_scores[-1] = 0 seq_detection_label = np.zeros(1, dtype=np.int32) seq_detection_score = np.zeros(1, dtype=np.float32) best_score = np.max(valid_max_scores) if best_score > 0: if not is_gt_in_seq: print(is_gt_in_seq) print('matched box with no gt') print(valid_max_scores) #print('valid box') best_im = np.argmax(max_seq_scores) #print(best_im, best_score) for i in range(len(seqs[seq])): temp_labels = np.zeros(len(seq_detection_labels[i]), dtype=np.int32) temp_scores = np.zeros(len(seq_detection_scores[i]), dtype=np.float32) for j in range(min(seq_num_gts[i], len(temp_labels))): temp_labels[j] = True #TODO: this currently only works for oneclass? temp_scores[j] = best_score seq_detection_labels[i] = temp_labels seq_detection_scores[i] = temp_scores seq_detection_label[0] = True seq_detection_score[0] = best_score else: #print('no valid box') seq_detection_label[0] = False seq_detection_score[0] = np.max(max_seq_scores) #if sum(seq_num_gts)>0: if is_gt_in_seq: num_total_gts+=1 detection_labels.append(seq_detection_label) detection_scores.append(seq_detection_score) scores = np.concatenate(detection_scores) labels = np.concatenate(detection_labels).astype(np.bool) print(count) print(len(seqs.keys())) print(sum([1 for i in range(len(detection_labels)) if detection_labels[i] == True]), num_total_gts) precision, recall = metrics.compute_precision_recall( scores, labels, num_total_gts ) average_precision = metrics.compute_average_precision(precision, recall) return precision, recall, average_precision
def testIOU(self): iou = np_box_ops.iou(self.boxes1, self.boxes2) expected_iou = np.array( [[2.0 / 16.0, 0.0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]], dtype=float) self.assertAllClose(iou, expected_iou)
def selectFnPerVideo(dataset, videos, active_set, detections, groundtruth_boxes, cycle): data_dir = '/home/javad/Projects/Audi_active_learning/tf/data/ILSVRC' score_thresh = 0.5 iou_thresh = 0.5 indices = [] aug_active_set = augment_active_set(dataset, videos, active_set, num_neighbors=5) unlabeled_set = [ f['idx'] for f in dataset if f['idx'] not in aug_active_set and f['verified'] ] # We have detections only for the labeled dataset, be careful with indexing #unlabeled_set = [i for i in range(len(dataset)) if i not in active_set] BOXES = detections['boxes'] SCORES = detections['scores'] gt_boxes = groundtruth_boxes['boxes'] stat_data = {} stat_data['videos_wo_FN'] = [] stat_data['FN_info'] = [] for v in videos: # Select frames in current video frames = [ f['idx'] for f in dataset if f['video'] == v and f['idx'] in unlabeled_set ] # Get only those that are not labeled #frames = [f for f in frames if f in unlabeled_set] # If all frames of video are in active set, ignore video if len(frames) > 0: #print('frame = ',frames) j = 0 FN = np.zeros((len(frames))) for f in frames: anno_ind = unlabeled_set.index(f) if gt_boxes[anno_ind].any(): # Extracting boxes with score greater than threshold ind = SCORES[anno_ind] > score_thresh boxes = np.array(BOXES[anno_ind])[ind, :] if boxes.any(): # Compute IOU between gt and detected bbox iou_mat = np_box_ops.iou(gt_boxes[anno_ind], boxes) iou = iou_mat.max(axis=1) # identify gt_boxes having low IOU with detected_boxes low_iou = iou < iou_thresh FN[j] = sum(low_iou) else: FN[j] = len(gt_boxes[anno_ind]) j += 1 if sum(FN) == 0: # case there is no False Negative in this video idx_sel = random.randint(0, len(FN) - 1) stat_data['videos_wo_FN'].append({'video': v}) else: idx_max = np.where(FN == max(FN)) idx_sel = np.random.choice(idx_max[0]) indices.append(frames[idx_sel]) print("Selecting frame {} from video {} with idx {}".format( idx_sel, v, frames[idx_sel])) #print(dataset[frames[idx_sel]]['filename']) #stat_data['FN_info'].append({'video':v,'frame':frames[idx_sel],'FN_loc':idx_sel,'video_length':len(frames)}) #========================visualization to check FPs================================ """ for f in frames: for d in dataset: if d['idx']==f and d['video']==v: IndInDs=dataset.index(d) anno_ind=unlabeled_set.index(f) video_dir = os.path.join(data_dir,'Data','VID','train',v) curr_im = Image.open(os.path.join(video_dir,dataset[IndInDs]['filename'])) im_w,im_h = curr_im.size vis_utils.draw_bounding_boxes_on_image(curr_im,normalize_box(gt_boxes[anno_ind],im_w,im_h),color='green') ind=SCORES[anno_ind] > score_thresh # Extracting boxes with score greater than threshold boxes=np.array(BOXES[anno_ind])[ind,:] vis_utils.draw_bounding_boxes_on_image(curr_im,normalize_box(boxes,im_w,im_h)) draw = ImageDraw.Draw(curr_im) curr_im.show() curr_im.save(data_dir+'/FN_samples'+'/FN_'+str(int(FN[frames.index(f)]))+'_'+dataset[IndInDs]['filename']) """ #================================================================================== #output_file = '/datatmp/Experiments/Javad/tf/data/ILSVRC/stat_data/FN_stat_data_cycle'+str(cycle)+'.json' #with open(output_file, 'w') as fn: # json.dump(stat_data, fn) return indices
def testIOU(self): iou = np_box_ops.iou(self.boxes1, self.boxes2) expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]], dtype=float) self.assertAllClose(iou, expected_iou)
def compute_precision_recall_with_images(detection_file): print('Loading detection file...') with open(detection_file) as f: detection_results = pickle.load(f) print('Clustering detections by image...') # group the detections by image id: per_image_detections = { detection_results['images'][idx]: { 'bboxes': detection_results['detections'][idx], 'scores': detection_results['detection_scores'][idx], 'labels': detection_results['detection_labels'][idx] } for idx in range(len(detection_results['images'])) } # group the ground truth annotations by image id: per_image_gts = { detection_results['images'][idx]: { 'bboxes': detection_results['gts'][idx], 'labels': detection_results['gt_labels'][idx] } for idx in range(len(detection_results['images'])) } per_image_eval = per_image_evaluation.PerImageEvaluation( num_groundtruth_classes=1, matching_iou_threshold=0.5, nms_iou_threshold=1.0, nms_max_output_boxes=10000) print('Running per-image analysis...') detection_labels = [] detection_scores = [] num_total_gts = 0 count = 0 for image_id, dets in per_image_detections.iteritems(): im_detection_labels = [] im_detection_scores = [] im_num_gts = [] max_im_scores = [] num_detections = len(dets['bboxes']) # [ymin, xmin, ymax, xmax] in absolute image coordinates. detected_boxes = np.zeros([num_detections, 4], dtype=np.float32) # detection scores for the boxes detected_scores = np.zeros([num_detections], dtype=np.float32) # 0-indexed detection classes for the boxes detected_class_labels = np.zeros([num_detections], dtype=np.int32) detected_masks = None for i in range(num_detections): x1, y1, x2, y2 = dets['bboxes'][i] detected_boxes[i] = np.array([y1, x1, y2, x2]) detected_scores[i] = dets['scores'][i] detected_class_labels[i] = dets['labels'][i] - 1 max_im_scores.append(np.max(detected_scores)) box_id = np.argmax(detected_scores) gts = per_image_gts[image_id] num_gts = len(gts['bboxes']) im_num_gts = num_gts if num_gts > 0: # [ymin, xmin, ymax, xmax] in absolute image coordinates groundtruth_boxes = np.zeros([num_gts, 4], dtype=np.float32) # 0-indexed groundtruth classes for the boxes groundtruth_class_labels = np.zeros(num_gts, dtype=np.int32) groundtruth_masks = None groundtruth_is_difficult_list = np.zeros(num_gts, dtype=bool) groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool) for i in range(num_gts): x1, y1, x2, y2 = gts['bboxes'][i] groundtruth_boxes[i] = np.array([y1, x1, y2, x2]) groundtruth_class_labels[i] = gts['labels'][i] - 1 ious = np_box_ops.iou(detected_boxes, groundtruth_boxes) if np.max(ious[box_id, :]) < 0.5: max_im_scores[-1] = 0 #print('detected animal box') #print(groundtruth_boxes, groundtruth_class_labels,detected_scores[0],detected_boxes[0], detected_class_labels[0]) scores, tp_fp_labels, is_class_correctly_detected_in_image = ( per_image_eval.compute_object_detection_metrics( detected_boxes=detected_boxes, detected_scores=detected_scores, detected_class_labels=detected_class_labels, groundtruth_boxes=groundtruth_boxes, groundtruth_class_labels=groundtruth_class_labels, groundtruth_is_difficult_list=groundtruth_is_difficult_list, groundtruth_is_group_of_list=groundtruth_is_group_of_list, detected_masks=detected_masks, groundtruth_masks=groundtruth_masks)) #print(scores, tp_fp_labels) im_detection_labels = tp_fp_labels[0] im_detection_scores = scores[0] #num_total_gts += num_gts count += 1 if count % 1000 == 0: print(str(count) + ' images complete') #if (tp_fp_labels[0].shape[0] != num_detections): # print('Incorrect label length') #if scores[0].shape[0] != num_detections: # print('Incorrect score length') #if tp_fp_labels[0].sum() > num_gts: # print('Too many correct detections') else: im_detection_labels = np.zeros(num_detections, dtype=np.int32) im_detection_scores = detected_scores max_im_scores[-1] = 0 best_score = np.max(max_im_scores) if best_score > 0: #print('valid box') best_im = np.argmax(max_im_scores) #print(best_im, best_score) temp_labels = np.zeros(len(im_detection_labels), dtype=np.int32) temp_scores = np.zeros(len(im_detection_scores), dtype=np.float32) for j in range(min(im_num_gts, len(im_detection_labels))): temp_labels[ j] = True #TODO: this currently only works for oneclass? temp_scores[j] = best_score im_detection_labels = temp_labels im_detection_scores = temp_scores num_total_gts += im_num_gts detection_labels.append(im_detection_labels) detection_scores.append(im_detection_scores) print(len(detection_scores), len(detection_scores[0]), len(detection_scores[1])) scores = np.concatenate(detection_scores) labels = np.concatenate(detection_labels).astype(np.bool) precision, recall = metrics.compute_precision_recall( scores, labels, num_total_gts) average_precision = metrics.compute_average_precision(precision, recall) return precision, recall, average_precision
def non_max_suppression(boxlist, max_output_size=10000, iou_threshold=1.0, score_threshold=-10.0): """Non maximum suppression. This op greedily selects a subset of detection bounding boxes, pruning away boxes that have high IOU (intersection over union) overlap (> thresh) with already selected boxes. In each iteration, the detected bounding box with highest score in the available pool is selected. Args: boxlist: BoxList holding N boxes. Must contain a 'scores' field representing detection scores. All scores belong to the same class. max_output_size: maximum number of retained boxes iou_threshold: intersection over union threshold. score_threshold: minimum score threshold. Remove the boxes with scores less than this value. Default value is set to -10. A very low threshold to pass pretty much all the boxes, unless the user sets a different score threshold. Returns: a BoxList holding M boxes where M <= max_output_size Raises: ValueError: if 'scores' field does not exist ValueError: if threshold is not in [0, 1] ValueError: if max_output_size < 0 """ if not boxlist.has_field('scores'): raise ValueError('Field scores does not exist') if iou_threshold < 0. or iou_threshold > 1.0: raise ValueError('IOU threshold must be in [0, 1]') if max_output_size < 0: raise ValueError('max_output_size must be bigger than 0.') boxlist = filter_scores_greater_than(boxlist, score_threshold) if boxlist.num_boxes() == 0: return boxlist boxlist = sort_by_field(boxlist, 'scores') # Prevent further computation if NMS is disabled. if iou_threshold == 1.0: if boxlist.num_boxes() > max_output_size: selected_indices = np.arange(max_output_size) return gather(boxlist, selected_indices) else: return boxlist boxes = boxlist.get() num_boxes = boxlist.num_boxes() # is_index_valid is True only for all remaining valid boxes, is_index_valid = np.full(num_boxes, 1, dtype=bool) selected_indices = [] num_output = 0 for i in range(num_boxes): if num_output < max_output_size: if is_index_valid[i]: num_output += 1 selected_indices.append(i) is_index_valid[i] = False valid_indices = np.where(is_index_valid)[0] if valid_indices.size == 0: break intersect_over_union = np_box_ops.iou( np.expand_dims(boxes[i, :], axis=0), boxes[valid_indices, :]) intersect_over_union = np.squeeze(intersect_over_union, axis=0) is_index_valid[valid_indices] = np.logical_and( is_index_valid[valid_indices], intersect_over_union <= iou_threshold) return gather(boxlist, np.array(selected_indices))
def select_TCFP_per_video(dataset, videos, data_dir, active_set, detections): # Selector configuration threshold_track = 0.7 num_frames_to_track = 3 # Tracker configuration hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # Candidates are verified frames that aren't close to already labeled frames aug_active_set = augment_active_set(dataset, videos, active_set, num_neighbors=5) unlabeled_set = [ f['idx'] for f in dataset if f['idx'] not in aug_active_set ] #unlabeled_set = [i for i in range(len(dataset)) if i not in active_set] total_frames = len( [f for f in dataset if f['idx'] in unlabeled_set and f['verified']]) overall_frame_counter = 0 # ARE DETECTIONS NMSed? detected_boxes = detections['boxes'] detected_scores = detections['scores'] detected_labels = detections['labels'] indices = [] elapsed_time = [] # Get only top detections for i in range(len(detected_boxes)): detected_boxes[i], detected_scores[i], detected_labels[ i] = filter_detections(detected_boxes[i], detected_scores[i], detected_labels[i]) #gt_boxes = groundtruths['boxes'] for v in videos: video_dir = os.path.join(data_dir, 'Data', 'VID', 'train', v) # Select frames in current video (even those with wrong GTs) frames = [[f['idx'], f['filename'], f['verified']] for f in dataset if f['video'] == v] # Get maximium index of frames in video idx_all_frames_video = [f[0] for f in frames] max_frame = np.max(idx_all_frames_video) # Get only those that are not labeled and verified --> pick from these frames_unlabeled = [ f for f in frames if f[0] in unlabeled_set and f[2] ] if len(frames_unlabeled) > 0: frame_counter = 0 frame_list_video = [] pos_x_video = [] pos_y_video = [] target_w_video = [] target_h_video = [] num_good_dets_video = [] detections_neighbors_video = [] for fu in frames_unlabeled: idx_frame_video = idx_all_frames_video.index(fu[0]) frame_counter += 1 overall_frame_counter += 1 #print("Processing frame {}/{} with total idx:{}, video idx {}".format(frame_counter+1,len(frames_unlabeled),fu[0],idx_frame_video)) print( "Adding information about frame in video: {}/{}, overall: {}/{}" .format(frame_counter, len(frames_unlabeled), overall_frame_counter, total_frames)) # ASSUMPTION: for TCFP, we have detections for the whole dataset # Get boxes for current frame boxes_frame = detected_boxes[fu[0]] scores_frame = detected_scores[fu[0]] labels_frame = detected_labels[fu[0]] #gt_frame = gt_boxes[fu[0]] ## Visualization of frame's GT and detections #curr_im = Image.open(os.path.join(video_dir,frames[idx_frame_video][1])) #im_w,im_h = curr_im.size #vis_utils.draw_bounding_boxes_on_image(curr_im,normalize_box(gt_frame,im_w,im_h)) #vis_utils.draw_bounding_boxes_on_image(curr_im,normalize_box(boxes_frame[:50,:],im_w,im_h),color='green') #curr_im.show() num_good_dets = labels_frame.shape[0] num_good_dets_video.append(num_good_dets) for idx_det in range(num_good_dets): ###### Common part for forward and backward tracking # Convert [y x y x] to [y x w h] curr_box = convert_boxes_wh(boxes_frame[idx_det]) pos_x, pos_y, target_w, target_h = region_to_bbox(curr_box) # Append them twice, forward and backward pos_x_video.append(pos_x) pos_x_video.append(pos_x) pos_y_video.append(pos_y) pos_y_video.append(pos_y) target_w_video.append(target_w) target_w_video.append(target_w) target_h_video.append(target_h) target_h_video.append(target_h) ###### Forward part detections_neighbors = [] frame_list = [ os.path.join(video_dir, frames[idx_frame_video][1]) ] # I can't do this with list comprehension for some reason #frame_list = [frames[i] for i in range(idx_frame_video+1,idx_frame_video+4) if frames[i] in frames] for t in range(1, num_frames_to_track + 1): idx_neighbor = idx_frame_video + t # Check if neighbor still in video if idx_neighbor < len(frames): frame_list.append( os.path.join(video_dir, frames[idx_neighbor][1])) # Take only those of the current class detections_neighbors.append( detected_boxes[fu[0] + t][detected_labels[ fu[0] + t] == labels_frame[idx_det]]) frame_list_video.append(frame_list) detections_neighbors_video.append(detections_neighbors) #bboxes, speed = tracker(hp, run, design, frame_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, 1) ###### Backward part detections_neighbors = [] frame_list = [ os.path.join(video_dir, frames[idx_frame_video][1]) ] for t in range(1, num_frames_to_track + 1): idx_neighbor = idx_frame_video - t if idx_neighbor >= 0: frame_list.append( os.path.join(video_dir, frames[idx_neighbor][1])) # Take only those of the current class detections_neighbors.append( detected_boxes[fu[0] - t][detected_labels[ fu[0] - t] == labels_frame[idx_det]]) frame_list_video.append(frame_list) detections_neighbors_video.append(detections_neighbors) #bboxes, speed = tracker(hp, run, design, frame_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, 1) # Track ALL frames and all detections in video with one call bboxes_video, elapsed_time_video = tracker_full_video( hp, run, design, frame_list_video, pos_x_video, pos_y_video, target_w_video, target_h_video, final_score_sz, env) elapsed_time.append(elapsed_time_video) # Computation of TC-FP score frame_counter = 0 tc_scores = np.zeros(len(frames_unlabeled)) for fu in frames_unlabeled: num_good_dets = num_good_dets_video[frame_counter] tc_sum_frame = np.zeros(num_good_dets) tc_neigh_frame = np.zeros(num_good_dets) for idx_det in range(num_good_dets): # Return and delete from list first element, going in the same order as before bboxes = bboxes_video.pop(0) detections_neighbors = detections_neighbors_video.pop(0) frame_list = frame_list_video.pop(0) for t in range(1, len(frame_list)): # Visualize track and detections #curr_im = Image.open(frame_list[t]) #im_w,im_h = curr_im.size #vis_utils.draw_bounding_boxes_on_image(curr_im,normalize_box(convert_boxes_xy(bboxes[t]).reshape((1,4)),im_w,im_h)) #vis_utils.draw_bounding_boxes_on_image(curr_im,normalize_box(detections_neighbors[t-1],im_w,im_h),color='green') #curr_im.show() tc_neigh_frame[idx_det] += 1 # Check if tracked detection matches any detection in neighbor frame, if any if len(detections_neighbors[t - 1]) > 0: ovTr = np_box_ops.iou( convert_boxes_xy(bboxes[t]).reshape((1, 4)), detections_neighbors[t - 1]) # Increment score if it does if np.max(ovTr) > threshold_track: tc_sum_frame[idx_det] += 1 bboxes = bboxes_video.pop(0) detections_neighbors = detections_neighbors_video.pop(0) frame_list = frame_list_video.pop(0) for t in range(1, len(frame_list)): ## Visualize track and detections #curr_im = Image.open(frame_list[t]) #im_w,im_h = curr_im.size #vis_utils.draw_bounding_boxes_on_image(curr_im,normalize_box(convert_boxes_xy(bboxes[t]).reshape((1,4)),im_w,im_h)) #vis_utils.draw_bounding_boxes_on_image(curr_im,normalize_box(detections_neighbors[t-1],im_w,im_h),color='green') #curr_im.show() tc_neigh_frame[idx_det] += 1 # Check if tracked detection matches any detection in neighbor frame, if any if len(detections_neighbors[t - 1]) > 0: ovTr = np_box_ops.iou( convert_boxes_xy(bboxes[t]).reshape((1, 4)), detections_neighbors[t - 1]) # Increment score if it does if np.max(ovTr) > threshold_track: tc_sum_frame[idx_det] += 1 # Compute and save mean score per frame if num_good_dets > 0: # Score is normalized count tc_scores[frame_counter] = np.mean(tc_sum_frame / tc_neigh_frame) else: # Frames with no detections don't have TCFP score (inf so they aren't taken) tc_scores[frame_counter] = np.inf frame_counter += 1 # Select frames that achieve minimum idx_min = np.where(tc_scores == np.min(tc_scores)) idx_sel = np.random.choice(idx_min[0]) indices.append(frames_unlabeled[idx_sel][0]) print("Current average elapsed time per video: {:.2f}".format( np.mean(elapsed_time))) return indices
def non_max_suppression(boxlist, max_output_size=10000, iou_threshold=1.0, score_threshold=-10.0): """Non maximum suppression. This op greedily selects a subset of detection bounding boxes, pruning away boxes that have high IOU (intersection over union) overlap (> thresh) with already selected boxes. In each iteration, the detected bounding box with highest score in the available pool is selected. Args: boxlist: BoxList holding N boxes. Must contain a 'scores' field representing detection scores. All scores belong to the same class. max_output_size: maximum number of retained boxes iou_threshold: intersection over union threshold. score_threshold: minimum score threshold. Remove the boxes with scores less than this value. Default value is set to -10. A very low threshold to pass pretty much all the boxes, unless the user sets a different score threshold. Returns: a BoxList holding M boxes where M <= max_output_size Raises: ValueError: if 'scores' field does not exist ValueError: if threshold is not in [0, 1] ValueError: if max_output_size < 0 """ if not boxlist.has_field('scores'): raise ValueError('Field scores does not exist') if iou_threshold < 0. or iou_threshold > 1.0: raise ValueError('IOU threshold must be in [0, 1]') if max_output_size < 0: raise ValueError('max_output_size must be bigger than 0.') boxlist = filter_scores_greater_than(boxlist, score_threshold) if boxlist.num_boxes() == 0: return boxlist boxlist = sort_by_field(boxlist, 'scores') # Prevent further computation if NMS is disabled. if iou_threshold == 1.0: if boxlist.num_boxes() > max_output_size: selected_indices = np.arange(max_output_size) return gather(boxlist, selected_indices) else: return boxlist boxes = boxlist.get() num_boxes = boxlist.num_boxes() # is_index_valid is True only for all remaining valid boxes, is_index_valid = np.full(num_boxes, 1, dtype=bool) selected_indices = [] num_output = 0 for i in xrange(num_boxes): if num_output < max_output_size: if is_index_valid[i]: num_output += 1 selected_indices.append(i) is_index_valid[i] = False valid_indices = np.where(is_index_valid)[0] if valid_indices.size == 0: break intersect_over_union = np_box_ops.iou( np.expand_dims(boxes[i, :], axis=0), boxes[valid_indices, :]) intersect_over_union = np.squeeze(intersect_over_union, axis=0) is_index_valid[valid_indices] = np.logical_and( is_index_valid[valid_indices], intersect_over_union <= iou_threshold) return gather(boxlist, np.array(selected_indices))
def soft_non_max_suppression(boxlist, max_output_size=10000, iou_threshold=1.0, score_threshold=-10.0, nms_type=1, sigma=0.5): """Soft Non maximum suppression. This is an extended version of traditional NMS named soft-NMS Bodla, Navaneeth, et al. "Improving Object Detection With One Line of Code." arXiv preprint arXiv:1704.04503 (2017). Args: boxlist: BoxList holding N boxes. Must contain a 'scores' field representing detection scores. All scores belong to the same class. max_output_size: maximum number of retained boxes iou_threshold: intersection over union threshold. score_threshold: minimum score threshold. Remove the boxes with scores less than this value. Default value is set to -10. A very low threshold to pass pretty much all the boxes, unless the user sets a different score threshold. nms_type: 1(nms), 2(soft-nms linear), 3(soft-nms gaussian) sigma: parameter of gaussian function type Returns: a BoxList holding M boxes where M <= max_output_size Raises: ValueError: if not 1 <= nms_type <= 3 ValueError: if 'scores' field does not exist ValueError: if threshold is not in [0, 1] ValueError: if max_output_size < 0 """ def nms_weight_function(intersect_over_union, nms_type, iou_threshold, sigma): if nms_type == 1: # traditional weight return (intersect_over_union < iou_threshold).astype(np.float32) elif nms_type == 2: # soft-NMS linear version intersect_over_union[intersect_over_union < iou_threshold] = 0 return 1 - intersect_over_union elif nms_type == 3: # soft-NMS gaussian version return np.exp(-np.square(intersect_over_union) / sigma) else: raise ValueError('nms_type range (1~3)') if not boxlist.has_field('scores'): raise ValueError('Field scores does not exist') if iou_threshold < 0. or iou_threshold > 1.0: raise ValueError('IOU threshold must be in [0, 1]') if max_output_size < 0: raise ValueError('max_output_size must be bigger than 0.') boxlist = filter_scores_greater_than(boxlist, score_threshold) if boxlist.num_boxes() == 0: return boxlist boxlist = sort_by_field(boxlist, 'scores') # Prevent further computation if NMS is disabled. if iou_threshold == 1.0: if boxlist.num_boxes() > max_output_size: selected_indices = np.arange(max_output_size) return gather(boxlist, selected_indices) else: return boxlist boxes = boxlist.get() scores = boxlist.get_field('scores') num_boxes = boxlist.num_boxes() # is_index_valid is True only for all remaining valid boxes, is_index_valid = np.full(num_boxes, 1, dtype=bool) num_output = 0 for i in xrange(num_boxes): if num_output < max_output_size: # ind_max = np.argmax(scores*is_index_valid.astype(np.int)) # score_max = scores[ind_max] # if score_max <= score_threshold: # break ind_max = -1 score_max = score_threshold for j in xrange(num_boxes): if is_index_valid[j] == True and scores[j] > score_max: ind_max = j score_max = scores[j] if score_max > score_threshold: num_output += 1 is_index_valid[ind_max] = False valid_indices = np.where(is_index_valid)[0] if valid_indices.size == 0: break intersect_over_union = np_box_ops.iou( np.expand_dims(boxes[ind_max, :], axis=0), boxes[valid_indices, :]) intersect_over_union = np.squeeze(intersect_over_union, axis=0) scores[valid_indices] = scores[ valid_indices] * nms_weight_function( intersect_over_union, nms_type, iou_threshold, sigma) else: break boxlist = filter_scores_greater_than(boxlist, max(0.0, score_threshold)) boxlist = sort_by_field(boxlist, 'scores') if boxlist.num_boxes() > max_output_size: selected_indices = np.arange(max_output_size) return gather(boxlist, selected_indices) else: return boxlist return boxlist
def calc_iou_with_previous(image_time, bbox_iou_threshold, camera_id, region_id, bbox_stack_list, bbox_push_list, bbox_array): ''' use IOU algorithm to compare current with previous stack is a all bboxes from previous inferences (where something was detected) size = DEDUP_DEPTH SIMPLE - 1 object detected consider: b1 = np.array([[0.10, 0.20, 0.30, 0.40], [0.12, 0.22, 0.32, 0.42], [0.08, 0.18, 0.28, 0.38], [0.4, 0.6, 0.4, 0.6], [0.10, 0.2, 0.3, 0.4]]) b2 = np.array([[0.10, 0.20, 0.30, 0.40]]) # iou = [[1], [0.68], [0.68], [0.], [1]] average = 0.67 match_rates = iou(b1,b2).reshape(-1,) matches = np.argwhere(match_rates > 0.8).size COMPLEX - 2 objects detected (history = 2 object, 1 object, 2 objects) b1 = np.array([[0.1, 0.11, 0.2, 0.22], [0.3, 0.33, 0.4, 0.44], [0.1, 0.11, 0.2, 0.22], [0.1, 0.11, 0.2, 0.22], [0.3, 0.33, 0.4, 0.44]]) b2 = [[0.1, 0.11, 0.2, 0.22], [0.3, 0.33, 0.4, 0.44]] match_rates = iou(b1,b2) np.count_nonzero(match_rates[:,0] > 0.8) np.count_nonzero(match_rates[:,1] > 0.8) returns = IOU ''' # get the bbox_stack bbox_stack = bbox_stack_list[region_id] # should be [4, depth] bbox = bbox_array.reshape(-1,4) log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} ---- BEFORE ----') log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} Stack Before: {bbox_stack.shape}') log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} stack: {bbox_stack.tolist()}') log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} bbox: {bbox}') log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} push list BEFORE: {bbox_push_list[region_id]}') match_rates = iou(bbox_stack, bbox) det_obj_count = bbox.shape[0] log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} match_rates: {match_rates.tolist()}') # for the number of detected objects (1 object = 1 bb0x) # how many matches? match_counts = [] for i in range(det_obj_count): object_match_count = np.count_nonzero(match_rates[:,i] > bbox_iou_threshold) match_counts.append(object_match_count) # push match count to the bbox push list (so you know how many to pop off) # push list is nexted list - 1 list per region bbox_push_list[region_id].append(det_obj_count) # push the object count into the list objects_to_remove = bbox_push_list[region_id][0] # how many rows to remove from bbox_stack # zero based index so subtract 1 bbox_push_list[region_id].pop(0) # pop the first log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} ----- AFTER -----') log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} -- bbox_stack {bbox_stack.shape}') log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} -- bbox {bbox.shape}') bbox_stack_list[region_id] = np.append(bbox_stack, bbox, 0) log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} -- box_stack_list-appended: {region_id} {bbox_stack_list[region_id].shape}') log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} Stack BEFORE delete: {bbox_stack_list[region_id].tolist()}') log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} Slice: {objects_to_remove}') bbox_stack_list[region_id] = np.delete(bbox_stack_list[region_id], slice(0, objects_to_remove), 0) log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} Match Counts: {match_counts}') log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} Stack After: {bbox_stack_list[region_id].tolist()}') log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} Push List: {bbox_push_list[region_id]}') log.debug(f'id new -- cam#: {camera_id} reg# {region_id} {image_time} -- match counts: {match_counts}') return match_counts